diff --git a/Gruntfile.js b/Gruntfile.js index 7f004ab468ec..f151bad399df 100644 --- a/Gruntfile.js +++ b/Gruntfile.js @@ -24,7 +24,10 @@ module.exports = function (grunt) { pkg: grunt.file.readJSON('package.json'), clean: { - out: ['gen/', 'out/', 'out-wpt/', 'out-node/'], + gen: ['gen/'], + out: ['out/'], + 'out-wpt': ['out-wpt/'], + 'out-node': ['out-node/'], }, run: { @@ -246,17 +249,20 @@ module.exports = function (grunt) { }); grunt.registerTask('generate-common', 'Generate files into gen/ and src/', [ + 'clean:gen', 'run:generate-version', 'run:generate-listings-and-webworkers', 'run:generate-cache', ]); grunt.registerTask('build-standalone', 'Build out/ (no checks; run after generate-common)', [ + 'clean:out', 'run:build-out', 'run:copy-assets', 'copy:gen-to-out', 'copy:htmlfiles-to-out', ]); grunt.registerTask('build-wpt', 'Build out-wpt/ (no checks; run after generate-common)', [ + 'clean:out-wpt', 'run:build-out-wpt', 'run:copy-assets-wpt', 'copy:gen-to-out-wpt', @@ -265,6 +271,7 @@ module.exports = function (grunt) { 'run:autoformat-out-wpt', ]); grunt.registerTask('build-node', 'Build out-node/ (no checks; run after generate-common)', [ + 'clean:out-node', 'run:build-out-node', 'run:copy-assets-node', ]); @@ -282,7 +289,6 @@ module.exports = function (grunt) { grunt.registerTask('pre', ['all']); registerTaskAndAddToHelp('all', 'Run all builds and checks', [ - 'clean', 'generate-common', 'concurrent:all-builds-and-checks', ]); diff --git a/package-lock.json b/package-lock.json index 4837e5c70485..cc4e97a2ce64 100644 --- a/package-lock.json +++ b/package-lock.json @@ -24,7 +24,7 @@ "@types/w3c-image-capture": "^1.0.10", "@typescript-eslint/eslint-plugin": "^6.9.1", "@typescript-eslint/parser": "^6.9.1", - "@webgpu/types": "^0.1.43", + "@webgpu/types": "^0.1.49", "ansi-colors": "4.1.3", "babel-plugin-add-header-comment": "^1.0.3", "babel-plugin-const-enum": "^1.2.0", @@ -1539,9 +1539,9 @@ "dev": true }, "node_modules/@webgpu/types": { - "version": "0.1.43", - "resolved": "https://registry.npmjs.org/@webgpu/types/-/types-0.1.43.tgz", - "integrity": "sha512-HoP+d+m+Kuq8CsE63BZ3+BYBKAemrqbHUNrCalxrUju5XW+q/094Q3oeIa+2pTraEbO8ckJmGpibzyGT4OV4YQ==", + "version": "0.1.49", + "resolved": "https://registry.npmjs.org/@webgpu/types/-/types-0.1.49.tgz", + "integrity": "sha512-NMmS8/DofhH/IFeW+876XrHVWel+J/vdcFCHLDqeJgkH9x0DeiwjVd8LcBdaxdG/T7Rf8VUAYsA8X1efMzLjRQ==", "dev": true }, "node_modules/abbrev": { @@ -10076,9 +10076,9 @@ "dev": true }, "@webgpu/types": { - "version": "0.1.43", - "resolved": "https://registry.npmjs.org/@webgpu/types/-/types-0.1.43.tgz", - "integrity": "sha512-HoP+d+m+Kuq8CsE63BZ3+BYBKAemrqbHUNrCalxrUju5XW+q/094Q3oeIa+2pTraEbO8ckJmGpibzyGT4OV4YQ==", + "version": "0.1.49", + "resolved": "https://registry.npmjs.org/@webgpu/types/-/types-0.1.49.tgz", + "integrity": "sha512-NMmS8/DofhH/IFeW+876XrHVWel+J/vdcFCHLDqeJgkH9x0DeiwjVd8LcBdaxdG/T7Rf8VUAYsA8X1efMzLjRQ==", "dev": true }, "abbrev": { diff --git a/package.json b/package.json index 9d311579c314..3ef62315db25 100644 --- a/package.json +++ b/package.json @@ -50,7 +50,7 @@ "@types/w3c-image-capture": "^1.0.10", "@typescript-eslint/eslint-plugin": "^6.9.1", "@typescript-eslint/parser": "^6.9.1", - "@webgpu/types": "^0.1.43", + "@webgpu/types": "^0.1.49", "ansi-colors": "4.1.3", "babel-plugin-add-header-comment": "^1.0.3", "babel-plugin-const-enum": "^1.2.0", diff --git a/src/common/framework/test_config.ts b/src/common/framework/test_config.ts index e6624ae12014..072aaf736027 100644 --- a/src/common/framework/test_config.ts +++ b/src/common/framework/test_config.ts @@ -4,8 +4,20 @@ export type TestConfig = { */ enableDebugLogs: boolean; + /** + * Maximum number of subcases in flight at once, within a case. Once this many + * are in flight, wait for a subcase to finish before starting the next one. + */ maxSubcasesInFlight: number; + + /** + * Every `subcasesBetweenAttemptingGC` subcases, run `attemptGarbageCollection()`. + * Setting to `Infinity` disables this. Setting to 1 attempts GC every time (slow!). + */ + subcasesBetweenAttemptingGC: number; + testHeartbeatCallback: () => void; + noRaceWithRejectOnTimeout: boolean; /** @@ -40,7 +52,8 @@ export type TestConfig = { export const globalTestConfig: TestConfig = { enableDebugLogs: false, - maxSubcasesInFlight: 500, + maxSubcasesInFlight: 100, + subcasesBetweenAttemptingGC: 5000, testHeartbeatCallback: () => {}, noRaceWithRejectOnTimeout: false, unrollConstEvalLoops: false, diff --git a/src/common/internal/logging/test_case_recorder.ts b/src/common/internal/logging/test_case_recorder.ts index 78f625269e3d..eb03f4ea96f7 100644 --- a/src/common/internal/logging/test_case_recorder.ts +++ b/src/common/internal/logging/test_case_recorder.ts @@ -44,7 +44,7 @@ export class TestCaseRecorder { private startTime = -1; private logs: LogMessageWithStack[] = []; private logLinesAtCurrentSeverity = 0; - private debugging = false; + public debugging = false; constructor(result: LiveTestCaseResult, debugging: boolean) { this.result = result; diff --git a/src/common/internal/test_group.ts b/src/common/internal/test_group.ts index e1d0cde12d5c..ac3b11082771 100644 --- a/src/common/internal/test_group.ts +++ b/src/common/internal/test_group.ts @@ -31,6 +31,7 @@ import { stringifyPublicParamsUniquely, } from '../internal/query/stringify_params.js'; import { validQueryPart } from '../internal/query/validQueryPart.js'; +import { attemptGarbageCollection } from '../util/collect_garbage.js'; import { DeepReadonly } from '../util/types.js'; import { assert, unreachable } from '../util/util.js'; @@ -620,7 +621,7 @@ class RunCaseSpecific implements RunCase { const subcasePrefix = 'subcase: ' + stringifyPublicParams(subParams); const subRec = new Proxy(rec, { get: (target, k: keyof TestCaseRecorder) => { - const prop = TestCaseRecorder.prototype[k]; + const prop = rec[k] ?? TestCaseRecorder.prototype[k]; if (typeof prop === 'function') { testHeartbeatCallback(); return function (...args: Parameters) { @@ -696,6 +697,7 @@ class RunCaseSpecific implements RunCase { subRec.threw(ex); } }) + .finally(attemptGarbageCollectionIfDue) .finally(subcaseFinishedCallback); allPreviousSubcasesFinalizedPromise = allPreviousSubcasesFinalizedPromise.then( @@ -711,13 +713,17 @@ class RunCaseSpecific implements RunCase { rec.skipped(new SkipTestCase('all subcases were skipped')); } } else { - await this.runTest( - rec, - sharedState, - this.params, - /* throwSkip */ false, - getExpectedStatus(selfQuery) - ); + try { + await this.runTest( + rec, + sharedState, + this.params, + /* throwSkip */ false, + getExpectedStatus(selfQuery) + ); + } finally { + await attemptGarbageCollectionIfDue(); + } } } finally { testHeartbeatCallback(); @@ -754,3 +760,17 @@ export type CaseTimingLogLine = { */ nonskippedSubcaseCount: number; }; + +/** Every `subcasesBetweenAttemptingGC` calls to this function will `attemptGarbageCollection()`. */ +const attemptGarbageCollectionIfDue: () => Promise = (() => { + // This state is global because garbage is global. + let subcasesSinceLastGC = 0; + + return async function attemptGarbageCollectionIfDue() { + subcasesSinceLastGC++; + if (subcasesSinceLastGC >= globalTestConfig.subcasesBetweenAttemptingGC) { + subcasesSinceLastGC = 0; + return attemptGarbageCollection(); + } + }; +})(); diff --git a/src/common/runtime/standalone.ts b/src/common/runtime/standalone.ts index 932c5668b587..0305031cc790 100644 --- a/src/common/runtime/standalone.ts +++ b/src/common/runtime/standalone.ts @@ -369,6 +369,9 @@ function makeSubtreeChildrenHTML( const runMySubtree = async () => { const results: SubtreeResult[] = []; for (const { runSubtree } of childFns) { + if (stopRequested) { + break; + } results.push(await runSubtree()); } return mergeSubtreeResults(...results); diff --git a/src/common/tools/dev_server.ts b/src/common/tools/dev_server.ts index 1d1313e4f51c..8d78855974d6 100644 --- a/src/common/tools/dev_server.ts +++ b/src/common/tools/dev_server.ts @@ -106,10 +106,6 @@ const app = express(); // Send Chrome Origin Trial tokens app.use((_req, res, next) => { - res.header('Origin-Trial', [ - // Token for http://localhost:8080 - 'AvyDIV+RJoYs8fn3W6kIrBhWw0te0klraoz04mw/nPb8VTus3w5HCdy+vXqsSzomIH745CT6B5j1naHgWqt/tw8AAABJeyJvcmlnaW4iOiJodHRwOi8vbG9jYWxob3N0OjgwODAiLCJmZWF0dXJlIjoiV2ViR1BVIiwiZXhwaXJ5IjoxNjYzNzE4Mzk5fQ==', - ]); next(); }); diff --git a/src/common/tools/gen_wpt_cts_html.ts b/src/common/tools/gen_wpt_cts_html.ts index 46c2ae435491..35eac195b33c 100644 --- a/src/common/tools/gen_wpt_cts_html.ts +++ b/src/common/tools/gen_wpt_cts_html.ts @@ -9,6 +9,8 @@ import { } from '../internal/query/query.js'; import { assert } from '../util/util.js'; +const kMaxQueryLength = 184; + function printUsageAndExit(rc: number): never { console.error(`\ Usage (simple, for webgpu:* suite only): @@ -193,6 +195,7 @@ let config: Config; const loader = new DefaultTestFileLoader(); const lines = []; + const tooLongQueries = []; for (const prefix of config.argumentsPrefixes) { const rootQuery = new TestQueryMultiFile(config.suite, []); const tree = await loader.loadTree(rootQuery, { @@ -219,15 +222,9 @@ let config: Config; // Check for a safe-ish path length limit. Filename must be <= 255, and on Windows the whole // path must be <= 259. Leave room for e.g.: // 'c:\b\s\w\xxxxxxxx\layout-test-results\external\wpt\webgpu\cts_worker=0_q=...-actual.txt' - assert( - queryString.length < 185, - `Generated test variant would produce too-long -actual.txt filename. Possible solutions: -- Reduce the length of the parts of the test query -- Reduce the parameterization of the test -- Make the test function faster and regenerate the listing_meta entry -- Reduce the specificity of test expectations (if you're using them) -${queryString}` - ); + if (queryString.length > kMaxQueryLength) { + tooLongQueries.push(queryString); + } } lines.push({ @@ -243,6 +240,29 @@ ${queryString}` } prefixComment.comment += `; ${variantCount} variants generated from ${testsSeen.size} tests in ${filesSeen.size} files`; } + + if (tooLongQueries.length > 0) { + // Try to show some representation of failures. We show one entry from each + // test that is different length. Without this the logger cuts off the error + // messages and you end up not being told about which tests have issues. + const queryStrings = new Map(); + tooLongQueries.forEach(s => { + const colonNdx = s.lastIndexOf(':'); + const prefix = s.substring(0, colonNdx + 1); + const id = `${prefix}:${s.length}`; + queryStrings.set(id, s); + }); + throw new Error( + `Generated test variant would produce too-long -actual.txt filename. Possible solutions: + - Reduce the length of the parts of the test query + - Reduce the parameterization of the test + - Make the test function faster and regenerate the listing_meta entry + - Reduce the specificity of test expectations (if you're using them) +|<${''.padEnd(kMaxQueryLength - 4, '-')}>| +${[...queryStrings.values()].join('\n')}` + ); + } + await generateFile(lines); })().catch(ex => { console.log(ex.stack ?? ex.toString()); diff --git a/src/common/util/navigator_gpu.ts b/src/common/util/navigator_gpu.ts index 6f3a423db39f..4e58797097ed 100644 --- a/src/common/util/navigator_gpu.ts +++ b/src/common/util/navigator_gpu.ts @@ -68,12 +68,11 @@ export function getGPU(recorder: TestCaseRecorder | null): GPU { ): Promise { const promise = oldFn.call(this, { ...defaultRequestAdapterOptions, ...options }); if (recorder) { - void promise.then(async adapter => { + void promise.then(adapter => { if (adapter) { - // MAINTENANCE_TODO: Remove requestAdapterInfo when info is implemented. - const info = adapter.info || (await adapter.requestAdapterInfo()); - const infoString = `Adapter: ${info.vendor} / ${info.architecture} / ${info.device}`; - recorder.debug(new ErrorWithExtra(infoString, () => ({ adapterInfo: info }))); + const adapterInfo = adapter.info; + const infoString = `Adapter: ${adapterInfo.vendor} / ${adapterInfo.architecture} / ${adapterInfo.device}`; + recorder.debug(new ErrorWithExtra(infoString, () => ({ adapterInfo }))); } }); } diff --git a/src/resources/cache/hashes.json b/src/resources/cache/hashes.json index e0459422560f..e2224325944f 100644 --- a/src/resources/cache/hashes.json +++ b/src/resources/cache/hashes.json @@ -1,112 +1,112 @@ { - "webgpu/shader/execution/binary/af_addition.bin": "338b5b67", - "webgpu/shader/execution/binary/af_logical.bin": "3b2aceb8", - "webgpu/shader/execution/binary/af_division.bin": "a77dc4c0", - "webgpu/shader/execution/binary/af_matrix_addition.bin": "136a7fbb", - "webgpu/shader/execution/binary/af_matrix_subtraction.bin": "90f2c731", - "webgpu/shader/execution/binary/af_multiplication.bin": "35ba40b9", - "webgpu/shader/execution/binary/af_remainder.bin": "41582f85", - "webgpu/shader/execution/binary/af_subtraction.bin": "a41420b2", - "webgpu/shader/execution/binary/f16_addition.bin": "ef10ca66", - "webgpu/shader/execution/binary/f16_logical.bin": "4bf24ca5", - "webgpu/shader/execution/binary/f16_division.bin": "f826b6ba", - "webgpu/shader/execution/binary/f16_matrix_addition.bin": "a910ddb0", - "webgpu/shader/execution/binary/f16_matrix_matrix_multiplication.bin": "9458671c", - "webgpu/shader/execution/binary/f16_matrix_scalar_multiplication.bin": "36be05d3", - "webgpu/shader/execution/binary/f16_matrix_subtraction.bin": "8aa6a88a", - "webgpu/shader/execution/binary/f16_matrix_vector_multiplication.bin": "38282a11", - "webgpu/shader/execution/binary/f16_multiplication.bin": "62f91819", - "webgpu/shader/execution/binary/f16_remainder.bin": "f829bb65", - "webgpu/shader/execution/binary/f16_subtraction.bin": "82d4e231", - "webgpu/shader/execution/binary/f32_addition.bin": "9b0a0c50", - "webgpu/shader/execution/binary/f32_logical.bin": "b75af25a", - "webgpu/shader/execution/binary/f32_division.bin": "f6d7832f", - "webgpu/shader/execution/binary/f32_matrix_addition.bin": "3317c75b", - "webgpu/shader/execution/binary/f32_matrix_matrix_multiplication.bin": "c6f990c8", - "webgpu/shader/execution/binary/f32_matrix_scalar_multiplication.bin": "b091a702", - "webgpu/shader/execution/binary/f32_matrix_subtraction.bin": "2d12a16b", - "webgpu/shader/execution/binary/f32_matrix_vector_multiplication.bin": "e1217524", - "webgpu/shader/execution/binary/f32_multiplication.bin": "19774fb3", - "webgpu/shader/execution/binary/f32_remainder.bin": "fd94bb9a", - "webgpu/shader/execution/binary/f32_subtraction.bin": "dba7cd7a", - "webgpu/shader/execution/binary/i32_arithmetic.bin": "e3b317e1", - "webgpu/shader/execution/binary/i32_comparison.bin": "63fa9be8", - "webgpu/shader/execution/binary/u32_arithmetic.bin": "e8b4008c", - "webgpu/shader/execution/binary/u32_comparison.bin": "d472fd61", - "webgpu/shader/execution/abs.bin": "631d932d", - "webgpu/shader/execution/acos.bin": "afcafcb1", - "webgpu/shader/execution/acosh.bin": "4b30eb95", - "webgpu/shader/execution/asin.bin": "c850c13d", - "webgpu/shader/execution/asinh.bin": "66a6acc0", - "webgpu/shader/execution/atan.bin": "2aabbb53", - "webgpu/shader/execution/atan2.bin": "82dd926a", - "webgpu/shader/execution/atanh.bin": "b98c937c", - "webgpu/shader/execution/bitcast.bin": "5daaee1b", - "webgpu/shader/execution/ceil.bin": "d0c32cf4", - "webgpu/shader/execution/clamp.bin": "4d1fc26a", - "webgpu/shader/execution/cos.bin": "dc837ae2", - "webgpu/shader/execution/cosh.bin": "d9e90580", - "webgpu/shader/execution/cross.bin": "ce7979f", - "webgpu/shader/execution/degrees.bin": "1436a196", - "webgpu/shader/execution/determinant.bin": "f36f1fa1", - "webgpu/shader/execution/distance.bin": "5103f8bd", - "webgpu/shader/execution/dot.bin": "4514172c", - "webgpu/shader/execution/exp.bin": "f41150bd", - "webgpu/shader/execution/exp2.bin": "19c494e", - "webgpu/shader/execution/faceForward.bin": "27b6e4a7", - "webgpu/shader/execution/floor.bin": "5bb5098b", - "webgpu/shader/execution/fma.bin": "daace9a4", - "webgpu/shader/execution/fract.bin": "be5f0334", - "webgpu/shader/execution/frexp.bin": "c9efaf7c", - "webgpu/shader/execution/inverseSqrt.bin": "8a50b907", - "webgpu/shader/execution/ldexp.bin": "cb4cea21", - "webgpu/shader/execution/length.bin": "a1b9fbeb", - "webgpu/shader/execution/log.bin": "9f2eb7c3", - "webgpu/shader/execution/log2.bin": "9ee7d861", - "webgpu/shader/execution/max.bin": "11e4608e", - "webgpu/shader/execution/min.bin": "7a084c44", - "webgpu/shader/execution/mix.bin": "7b892a4f", - "webgpu/shader/execution/modf.bin": "b3bf26d7", - "webgpu/shader/execution/normalize.bin": "18eba01d", - "webgpu/shader/execution/pack2x16float.bin": "82df446e", - "webgpu/shader/execution/pow.bin": "d3a05344", - "webgpu/shader/execution/quantizeToF16.bin": "7793770e", - "webgpu/shader/execution/radians.bin": "582c1f6b", - "webgpu/shader/execution/reflect.bin": "9161d6e5", - "webgpu/shader/execution/refract.bin": "817b59aa", - "webgpu/shader/execution/round.bin": "cb881aa2", - "webgpu/shader/execution/saturate.bin": "3716605e", - "webgpu/shader/execution/sign.bin": "549ac92f", - "webgpu/shader/execution/sin.bin": "5ec5bcb7", - "webgpu/shader/execution/sinh.bin": "62f6b736", - "webgpu/shader/execution/smoothstep.bin": "aa97768", - "webgpu/shader/execution/sqrt.bin": "d0a134ce", - "webgpu/shader/execution/step.bin": "b8035bb9", - "webgpu/shader/execution/tan.bin": "b34366cd", - "webgpu/shader/execution/tanh.bin": "8f5edddc", - "webgpu/shader/execution/transpose.bin": "1aa2de65", - "webgpu/shader/execution/trunc.bin": "cf43e3f7", - "webgpu/shader/execution/unpack2x16float.bin": "57ea7c02", - "webgpu/shader/execution/unpack2x16snorm.bin": "17fd3f86", - "webgpu/shader/execution/unpack2x16unorm.bin": "fc68bc4b", - "webgpu/shader/execution/unpack4x8snorm.bin": "fef504c1", - "webgpu/shader/execution/unpack4x8unorm.bin": "e8d8de93", - "webgpu/shader/execution/unary/af_arithmetic.bin": "14c0612a", - "webgpu/shader/execution/unary/af_assignment.bin": "3ad4afc", - "webgpu/shader/execution/unary/bool_conversion.bin": "15f7f3fb", - "webgpu/shader/execution/unary/f16_arithmetic.bin": "4a20db6d", - "webgpu/shader/execution/unary/f16_conversion.bin": "31f72f5a", - "webgpu/shader/execution/unary/f32_arithmetic.bin": "f1c311cb", - "webgpu/shader/execution/unary/f32_conversion.bin": "7539cdb3", - "webgpu/shader/execution/unary/i32_arithmetic.bin": "de945eec", - "webgpu/shader/execution/unary/i32_conversion.bin": "1728a03e", - "webgpu/shader/execution/unary/u32_conversion.bin": "9e6ca0ce", - "webgpu/shader/execution/unary/ai_assignment.bin": "1fd685a2", - "webgpu/shader/execution/binary/ai_arithmetic.bin": "90e651f4", - "webgpu/shader/execution/unary/ai_arithmetic.bin": "ba31d178", - "webgpu/shader/execution/binary/af_matrix_matrix_multiplication.bin": "bc8b52ef", - "webgpu/shader/execution/binary/af_matrix_scalar_multiplication.bin": "54edf6a2", - "webgpu/shader/execution/binary/af_matrix_vector_multiplication.bin": "43b036b1", - "webgpu/shader/execution/derivatives.bin": "65c15fc3", - "webgpu/shader/execution/fwidth.bin": "cc91c875" + "webgpu/shader/execution/binary/af_addition.bin": "d0c1b760", + "webgpu/shader/execution/binary/af_logical.bin": "ca60ce77", + "webgpu/shader/execution/binary/af_division.bin": "47ae1ca1", + "webgpu/shader/execution/binary/af_matrix_addition.bin": "afaf9bae", + "webgpu/shader/execution/binary/af_matrix_subtraction.bin": "42433bf3", + "webgpu/shader/execution/binary/af_multiplication.bin": "babfc501", + "webgpu/shader/execution/binary/af_remainder.bin": "19995293", + "webgpu/shader/execution/binary/af_subtraction.bin": "62f090b9", + "webgpu/shader/execution/binary/f16_addition.bin": "540ae334", + "webgpu/shader/execution/binary/f16_logical.bin": "c1f09c30", + "webgpu/shader/execution/binary/f16_division.bin": "b4eabc05", + "webgpu/shader/execution/binary/f16_matrix_addition.bin": "6b9113b", + "webgpu/shader/execution/binary/f16_matrix_matrix_multiplication.bin": "a7362ff1", + "webgpu/shader/execution/binary/f16_matrix_scalar_multiplication.bin": "4ac4e5bb", + "webgpu/shader/execution/binary/f16_matrix_subtraction.bin": "93d4d43a", + "webgpu/shader/execution/binary/f16_matrix_vector_multiplication.bin": "beed89d5", + "webgpu/shader/execution/binary/f16_multiplication.bin": "6b5f0d51", + "webgpu/shader/execution/binary/f16_remainder.bin": "a1f499b3", + "webgpu/shader/execution/binary/f16_subtraction.bin": "61a571d5", + "webgpu/shader/execution/binary/f32_addition.bin": "fa6cc596", + "webgpu/shader/execution/binary/f32_logical.bin": "2b155b60", + "webgpu/shader/execution/binary/f32_division.bin": "243c9ce6", + "webgpu/shader/execution/binary/f32_matrix_addition.bin": "d3bc6ed6", + "webgpu/shader/execution/binary/f32_matrix_matrix_multiplication.bin": "2a4c1527", + "webgpu/shader/execution/binary/f32_matrix_scalar_multiplication.bin": "d695442", + "webgpu/shader/execution/binary/f32_matrix_subtraction.bin": "b306b19", + "webgpu/shader/execution/binary/f32_matrix_vector_multiplication.bin": "aac6cbfd", + "webgpu/shader/execution/binary/f32_multiplication.bin": "a21303f5", + "webgpu/shader/execution/binary/f32_remainder.bin": "79e462a1", + "webgpu/shader/execution/binary/f32_subtraction.bin": "4e6bbf38", + "webgpu/shader/execution/binary/i32_arithmetic.bin": "167760cc", + "webgpu/shader/execution/binary/i32_comparison.bin": "6a9f856a", + "webgpu/shader/execution/binary/u32_arithmetic.bin": "ac424b44", + "webgpu/shader/execution/binary/u32_comparison.bin": "a9e71302", + "webgpu/shader/execution/abs.bin": "a42729c4", + "webgpu/shader/execution/acos.bin": "664a5662", + "webgpu/shader/execution/acosh.bin": "d3fb8eb0", + "webgpu/shader/execution/asin.bin": "5a4f5b9e", + "webgpu/shader/execution/asinh.bin": "3ce3fe4d", + "webgpu/shader/execution/atan.bin": "759d432", + "webgpu/shader/execution/atan2.bin": "95008607", + "webgpu/shader/execution/atanh.bin": "569bd1b6", + "webgpu/shader/execution/bitcast.bin": "4329e501", + "webgpu/shader/execution/ceil.bin": "55cc76e5", + "webgpu/shader/execution/clamp.bin": "d580a273", + "webgpu/shader/execution/cos.bin": "3107bc4b", + "webgpu/shader/execution/cosh.bin": "d36c86cc", + "webgpu/shader/execution/cross.bin": "e48c39ba", + "webgpu/shader/execution/degrees.bin": "f74b63d2", + "webgpu/shader/execution/determinant.bin": "f07e1160", + "webgpu/shader/execution/distance.bin": "93156a89", + "webgpu/shader/execution/dot.bin": "4e2fe407", + "webgpu/shader/execution/exp.bin": "3b269b18", + "webgpu/shader/execution/exp2.bin": "7aeeeaf6", + "webgpu/shader/execution/faceForward.bin": "451ffbd8", + "webgpu/shader/execution/floor.bin": "37131d74", + "webgpu/shader/execution/fma.bin": "30111350", + "webgpu/shader/execution/fract.bin": "5ef13392", + "webgpu/shader/execution/frexp.bin": "da764bc0", + "webgpu/shader/execution/inverseSqrt.bin": "6ff34703", + "webgpu/shader/execution/ldexp.bin": "5016cec9", + "webgpu/shader/execution/length.bin": "f236d2e7", + "webgpu/shader/execution/log.bin": "1c54f128", + "webgpu/shader/execution/log2.bin": "e44e2370", + "webgpu/shader/execution/max.bin": "eb4c1901", + "webgpu/shader/execution/min.bin": "f8c70a2b", + "webgpu/shader/execution/mix.bin": "df3b3f62", + "webgpu/shader/execution/modf.bin": "b600b26f", + "webgpu/shader/execution/normalize.bin": "7af3a3d2", + "webgpu/shader/execution/pack2x16float.bin": "7c67b10e", + "webgpu/shader/execution/pow.bin": "ee37f4ba", + "webgpu/shader/execution/quantizeToF16.bin": "a7a65754", + "webgpu/shader/execution/radians.bin": "51d423b9", + "webgpu/shader/execution/reflect.bin": "3ba4eda6", + "webgpu/shader/execution/refract.bin": "13fc4914", + "webgpu/shader/execution/round.bin": "9155b88b", + "webgpu/shader/execution/saturate.bin": "73cecf71", + "webgpu/shader/execution/sign.bin": "68d61a83", + "webgpu/shader/execution/sin.bin": "44219876", + "webgpu/shader/execution/sinh.bin": "158d261d", + "webgpu/shader/execution/smoothstep.bin": "7129c56b", + "webgpu/shader/execution/sqrt.bin": "9aaaf8aa", + "webgpu/shader/execution/step.bin": "85858027", + "webgpu/shader/execution/tan.bin": "dbbda634", + "webgpu/shader/execution/tanh.bin": "8c540d5c", + "webgpu/shader/execution/transpose.bin": "a676fc9a", + "webgpu/shader/execution/trunc.bin": "35ab398d", + "webgpu/shader/execution/unpack2x16float.bin": "eb9294c9", + "webgpu/shader/execution/unpack2x16snorm.bin": "7208eb73", + "webgpu/shader/execution/unpack2x16unorm.bin": "20d9669b", + "webgpu/shader/execution/unpack4x8snorm.bin": "c77e1a72", + "webgpu/shader/execution/unpack4x8unorm.bin": "d80caf66", + "webgpu/shader/execution/unary/af_arithmetic.bin": "963c3185", + "webgpu/shader/execution/unary/af_assignment.bin": "9e8a3b3f", + "webgpu/shader/execution/unary/bool_conversion.bin": "eee7a40c", + "webgpu/shader/execution/unary/f16_arithmetic.bin": "aaea9f75", + "webgpu/shader/execution/unary/f16_conversion.bin": "5b26998a", + "webgpu/shader/execution/unary/f32_arithmetic.bin": "65dfc2ac", + "webgpu/shader/execution/unary/f32_conversion.bin": "cd874be3", + "webgpu/shader/execution/unary/i32_arithmetic.bin": "af4c0e43", + "webgpu/shader/execution/unary/i32_conversion.bin": "5b6e4d9", + "webgpu/shader/execution/unary/u32_conversion.bin": "229649a6", + "webgpu/shader/execution/unary/ai_assignment.bin": "8efcf261", + "webgpu/shader/execution/binary/ai_arithmetic.bin": "a57ee284", + "webgpu/shader/execution/unary/ai_arithmetic.bin": "948016b6", + "webgpu/shader/execution/binary/af_matrix_matrix_multiplication.bin": "52c24212", + "webgpu/shader/execution/binary/af_matrix_scalar_multiplication.bin": "256556e1", + "webgpu/shader/execution/binary/af_matrix_vector_multiplication.bin": "38085521", + "webgpu/shader/execution/derivatives.bin": "f38a38ff", + "webgpu/shader/execution/fwidth.bin": "4e9fc55d" } \ No newline at end of file diff --git a/src/resources/cache/webgpu/shader/execution/abs.bin b/src/resources/cache/webgpu/shader/execution/abs.bin index 4cba9b72dff4..373fef8f3b32 100644 Binary files a/src/resources/cache/webgpu/shader/execution/abs.bin and b/src/resources/cache/webgpu/shader/execution/abs.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/acos.bin b/src/resources/cache/webgpu/shader/execution/acos.bin index 2ecaaa389a4e..5e311531fef4 100644 Binary files a/src/resources/cache/webgpu/shader/execution/acos.bin and b/src/resources/cache/webgpu/shader/execution/acos.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/acosh.bin b/src/resources/cache/webgpu/shader/execution/acosh.bin index d48659f3c325..82a3857ebdc6 100644 Binary files a/src/resources/cache/webgpu/shader/execution/acosh.bin and b/src/resources/cache/webgpu/shader/execution/acosh.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/asin.bin b/src/resources/cache/webgpu/shader/execution/asin.bin index b199953eaf4b..388de445c06b 100644 Binary files a/src/resources/cache/webgpu/shader/execution/asin.bin and b/src/resources/cache/webgpu/shader/execution/asin.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/asinh.bin b/src/resources/cache/webgpu/shader/execution/asinh.bin index b370c53b0179..120654f685c2 100644 Binary files a/src/resources/cache/webgpu/shader/execution/asinh.bin and b/src/resources/cache/webgpu/shader/execution/asinh.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/atan.bin b/src/resources/cache/webgpu/shader/execution/atan.bin index 6ab0ba106a9e..e81af87e15e0 100644 Binary files a/src/resources/cache/webgpu/shader/execution/atan.bin and b/src/resources/cache/webgpu/shader/execution/atan.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/atanh.bin b/src/resources/cache/webgpu/shader/execution/atanh.bin index e6a190b35df5..a7fee794094d 100644 Binary files a/src/resources/cache/webgpu/shader/execution/atanh.bin and b/src/resources/cache/webgpu/shader/execution/atanh.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/bitcast.bin b/src/resources/cache/webgpu/shader/execution/bitcast.bin index ead299d5e78f..e743a092553e 100644 Binary files a/src/resources/cache/webgpu/shader/execution/bitcast.bin and b/src/resources/cache/webgpu/shader/execution/bitcast.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/ceil.bin b/src/resources/cache/webgpu/shader/execution/ceil.bin index 9b93ed416f64..02cf23324cdf 100644 Binary files a/src/resources/cache/webgpu/shader/execution/ceil.bin and b/src/resources/cache/webgpu/shader/execution/ceil.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/cos.bin b/src/resources/cache/webgpu/shader/execution/cos.bin index 4e34eff3f1b1..a5d8573c6257 100644 Binary files a/src/resources/cache/webgpu/shader/execution/cos.bin and b/src/resources/cache/webgpu/shader/execution/cos.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/cosh.bin b/src/resources/cache/webgpu/shader/execution/cosh.bin index 5b30d2786c5e..25e8750cc7fc 100644 Binary files a/src/resources/cache/webgpu/shader/execution/cosh.bin and b/src/resources/cache/webgpu/shader/execution/cosh.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/degrees.bin b/src/resources/cache/webgpu/shader/execution/degrees.bin index 662558d78aca..eb514cb48b4d 100644 Binary files a/src/resources/cache/webgpu/shader/execution/degrees.bin and b/src/resources/cache/webgpu/shader/execution/degrees.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/distance.bin b/src/resources/cache/webgpu/shader/execution/distance.bin index 23a4756a69eb..06d0d9a8fc2a 100644 Binary files a/src/resources/cache/webgpu/shader/execution/distance.bin and b/src/resources/cache/webgpu/shader/execution/distance.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/floor.bin b/src/resources/cache/webgpu/shader/execution/floor.bin index b5341907f8ef..a199d0db9ff0 100644 Binary files a/src/resources/cache/webgpu/shader/execution/floor.bin and b/src/resources/cache/webgpu/shader/execution/floor.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/fract.bin b/src/resources/cache/webgpu/shader/execution/fract.bin index 7f09e8f60b23..bb80e873625f 100644 Binary files a/src/resources/cache/webgpu/shader/execution/fract.bin and b/src/resources/cache/webgpu/shader/execution/fract.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/frexp.bin b/src/resources/cache/webgpu/shader/execution/frexp.bin index 6811dfa29507..8f87d16a9c95 100644 Binary files a/src/resources/cache/webgpu/shader/execution/frexp.bin and b/src/resources/cache/webgpu/shader/execution/frexp.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/length.bin b/src/resources/cache/webgpu/shader/execution/length.bin index 3644d9b683ac..db42153edff7 100644 Binary files a/src/resources/cache/webgpu/shader/execution/length.bin and b/src/resources/cache/webgpu/shader/execution/length.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/log.bin b/src/resources/cache/webgpu/shader/execution/log.bin index ba591faad8a0..a5e62f8e02d8 100644 Binary files a/src/resources/cache/webgpu/shader/execution/log.bin and b/src/resources/cache/webgpu/shader/execution/log.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/log2.bin b/src/resources/cache/webgpu/shader/execution/log2.bin index 00641ce119cf..f19d77f41097 100644 Binary files a/src/resources/cache/webgpu/shader/execution/log2.bin and b/src/resources/cache/webgpu/shader/execution/log2.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/modf.bin b/src/resources/cache/webgpu/shader/execution/modf.bin index 363cc161fd72..74259a23a6f8 100644 Binary files a/src/resources/cache/webgpu/shader/execution/modf.bin and b/src/resources/cache/webgpu/shader/execution/modf.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/pack2x16float.bin b/src/resources/cache/webgpu/shader/execution/pack2x16float.bin index e95227d36e50..a7b99a0a6cfd 100644 Binary files a/src/resources/cache/webgpu/shader/execution/pack2x16float.bin and b/src/resources/cache/webgpu/shader/execution/pack2x16float.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/pow.bin b/src/resources/cache/webgpu/shader/execution/pow.bin index 4f5faf3293fa..f66ec5ca2fbe 100644 Binary files a/src/resources/cache/webgpu/shader/execution/pow.bin and b/src/resources/cache/webgpu/shader/execution/pow.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/quantizeToF16.bin b/src/resources/cache/webgpu/shader/execution/quantizeToF16.bin index 9e4308d5cd30..d6d75befc06b 100644 Binary files a/src/resources/cache/webgpu/shader/execution/quantizeToF16.bin and b/src/resources/cache/webgpu/shader/execution/quantizeToF16.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/radians.bin b/src/resources/cache/webgpu/shader/execution/radians.bin index f5285d108778..731e6be24ed5 100644 Binary files a/src/resources/cache/webgpu/shader/execution/radians.bin and b/src/resources/cache/webgpu/shader/execution/radians.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/round.bin b/src/resources/cache/webgpu/shader/execution/round.bin index c3b30b68f0a1..5ccab9e661c4 100644 Binary files a/src/resources/cache/webgpu/shader/execution/round.bin and b/src/resources/cache/webgpu/shader/execution/round.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/saturate.bin b/src/resources/cache/webgpu/shader/execution/saturate.bin index 2e1eb821a9e7..e7402f25af73 100644 Binary files a/src/resources/cache/webgpu/shader/execution/saturate.bin and b/src/resources/cache/webgpu/shader/execution/saturate.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/sign.bin b/src/resources/cache/webgpu/shader/execution/sign.bin index 033f2e8158f6..576019c008ee 100644 Binary files a/src/resources/cache/webgpu/shader/execution/sign.bin and b/src/resources/cache/webgpu/shader/execution/sign.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/sin.bin b/src/resources/cache/webgpu/shader/execution/sin.bin index a2ca632008ff..bdbbfe2bd539 100644 Binary files a/src/resources/cache/webgpu/shader/execution/sin.bin and b/src/resources/cache/webgpu/shader/execution/sin.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/sinh.bin b/src/resources/cache/webgpu/shader/execution/sinh.bin index 1176cd472bf2..b4b051a226ae 100644 Binary files a/src/resources/cache/webgpu/shader/execution/sinh.bin and b/src/resources/cache/webgpu/shader/execution/sinh.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/sqrt.bin b/src/resources/cache/webgpu/shader/execution/sqrt.bin index 6dd8088c0898..64a7db70d004 100644 Binary files a/src/resources/cache/webgpu/shader/execution/sqrt.bin and b/src/resources/cache/webgpu/shader/execution/sqrt.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/tan.bin b/src/resources/cache/webgpu/shader/execution/tan.bin index 572bee4df2a5..5af3e740d213 100644 Binary files a/src/resources/cache/webgpu/shader/execution/tan.bin and b/src/resources/cache/webgpu/shader/execution/tan.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/tanh.bin b/src/resources/cache/webgpu/shader/execution/tanh.bin index a13028b165f0..9687ff00235c 100644 Binary files a/src/resources/cache/webgpu/shader/execution/tanh.bin and b/src/resources/cache/webgpu/shader/execution/tanh.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/trunc.bin b/src/resources/cache/webgpu/shader/execution/trunc.bin index ba81e2ada427..e18bb52ed981 100644 Binary files a/src/resources/cache/webgpu/shader/execution/trunc.bin and b/src/resources/cache/webgpu/shader/execution/trunc.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/unary/bool_conversion.bin b/src/resources/cache/webgpu/shader/execution/unary/bool_conversion.bin index 98a90ea45b9a..f28c275092f1 100644 Binary files a/src/resources/cache/webgpu/shader/execution/unary/bool_conversion.bin and b/src/resources/cache/webgpu/shader/execution/unary/bool_conversion.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/unary/f16_conversion.bin b/src/resources/cache/webgpu/shader/execution/unary/f16_conversion.bin index 14299da76670..c47b3d0afcb0 100644 Binary files a/src/resources/cache/webgpu/shader/execution/unary/f16_conversion.bin and b/src/resources/cache/webgpu/shader/execution/unary/f16_conversion.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/unary/f32_arithmetic.bin b/src/resources/cache/webgpu/shader/execution/unary/f32_arithmetic.bin index ebc60029fa60..6e93bec14f76 100644 Binary files a/src/resources/cache/webgpu/shader/execution/unary/f32_arithmetic.bin and b/src/resources/cache/webgpu/shader/execution/unary/f32_arithmetic.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/unary/f32_conversion.bin b/src/resources/cache/webgpu/shader/execution/unary/f32_conversion.bin index 66b2bc73f889..55e1f5ed945f 100644 Binary files a/src/resources/cache/webgpu/shader/execution/unary/f32_conversion.bin and b/src/resources/cache/webgpu/shader/execution/unary/f32_conversion.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/unary/i32_conversion.bin b/src/resources/cache/webgpu/shader/execution/unary/i32_conversion.bin index 04841df60785..49969e9221ad 100644 Binary files a/src/resources/cache/webgpu/shader/execution/unary/i32_conversion.bin and b/src/resources/cache/webgpu/shader/execution/unary/i32_conversion.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/unary/u32_conversion.bin b/src/resources/cache/webgpu/shader/execution/unary/u32_conversion.bin index 277ffc4d76b7..8dfc4e268561 100644 Binary files a/src/resources/cache/webgpu/shader/execution/unary/u32_conversion.bin and b/src/resources/cache/webgpu/shader/execution/unary/u32_conversion.bin differ diff --git a/src/webgpu/api/operation/shader_module/compilation_info.spec.ts b/src/webgpu/api/operation/shader_module/compilation_info.spec.ts index 3382dabc3720..c0613087a96a 100644 --- a/src/webgpu/api/operation/shader_module/compilation_info.spec.ts +++ b/src/webgpu/api/operation/shader_module/compilation_info.spec.ts @@ -3,7 +3,6 @@ ShaderModule CompilationInfo tests. `; import { makeTestGroup } from '../../../../common/framework/test_group.js'; -import { keysOf } from '../../../../common/util/data_tables.js'; import { assert } from '../../../../common/util/util.js'; import { GPUTest } from '../../../gpu_test.js'; @@ -79,66 +78,24 @@ const kInvalidShaderSources = [ const kAllShaderSources = [...kValidShaderSources, ...kInvalidShaderSources]; -// This is the source the sourcemap refers to. -const kOriginalSource = new Array(20) - .fill(0) - .map((_, i) => `original line ${i}`) - .join('\n'); - -const kSourceMaps: { [name: string]: undefined | object } = { - none: undefined, - empty: {}, - // A valid source map. It maps `unknown` on lines 4 and line 5 to - // `wasUnknown` from lines 20, 21 respectively - valid: { - version: 3, - sources: ['myCode'], - sourcesContent: [kOriginalSource], - names: ['myMain', 'wasUnknown'], - mappings: ';kBAYkCA,OACd;SAElB;gBAKOC;gBACAA', - }, - // not a valid sourcemap - invalid: { - version: -123, - notAnything: {}, - }, - // The correct format but this data is for lines 11,12 even - // though the source only has 5 or 6 lines - nonMatching: { - version: 3, - sources: ['myCode'], - sourcesContent: [kOriginalSource], - names: ['myMain'], - mappings: ';;;;;;;;;;kBAYkCA,OACd;SAElB', - }, -}; -const kSourceMapsKeys = keysOf(kSourceMaps); - g.test('getCompilationInfo_returns') .desc( ` Test that getCompilationInfo() can be called on any ShaderModule. - Note: sourcemaps are not used in the WebGPU API. We are only testing that - browser that happen to use them don't fail or crash if the sourcemap is - bad or invalid. - - Test for both valid and invalid shader modules. - Test for shader modules containing only ASCII and those containing unicode characters. - Test that the compilation info for valid shader modules contains no errors. - Test that the compilation info for invalid shader modules contains at least one error.` ) - .params(u => - u.combineWithParams(kAllShaderSources).beginSubcases().combine('sourceMapName', kSourceMapsKeys) - ) + .params(u => u.combineWithParams(kAllShaderSources)) .fn(async t => { - const { _code, valid, sourceMapName } = t.params; + const { _code, valid } = t.params; const shaderModule = t.expectGPUError( 'validation', () => { - const sourceMap = kSourceMaps[sourceMapName]; - return t.device.createShaderModule({ code: _code, ...(sourceMap && { sourceMap }) }); + return t.device.createShaderModule({ code: _code }); }, !valid ); @@ -171,25 +128,15 @@ g.test('line_number_and_position') Test that line numbers reported by compilationInfo either point at an appropriate line and position or at 0:0, indicating an unknown position. - Note: sourcemaps are not used in the WebGPU API. We are only testing that - browser that happen to use them don't fail or crash if the sourcemap is - bad or invalid. - - Test for invalid shader modules containing containing at least one error. - Test for shader modules containing only ASCII and those containing unicode characters.` ) - .params(u => - u - .combineWithParams(kInvalidShaderSources) - .beginSubcases() - .combine('sourceMapName', kSourceMapsKeys) - ) + .params(u => u.combineWithParams(kInvalidShaderSources)) .fn(async t => { - const { _code, _errorLine, _errorLinePos, sourceMapName } = t.params; + const { _code, _errorLine, _errorLinePos } = t.params; const shaderModule = t.expectGPUError('validation', () => { - const sourceMap = kSourceMaps[sourceMapName]; - return t.device.createShaderModule({ code: _code, ...(sourceMap && { sourceMap }) }); + return t.device.createShaderModule({ code: _code }); }); const info = await shaderModule.getCompilationInfo(); @@ -232,24 +179,17 @@ g.test('offset_and_length') .desc( `Test that message offsets and lengths are valid and align with any reported lineNum and linePos. - Note: sourcemaps are not used in the WebGPU API. We are only testing that - browser that happen to use them don't fail or crash if the sourcemap is - bad or invalid. - - Test for valid and invalid shader modules. - Test for shader modules containing only ASCII and those containing unicode characters.` ) - .params(u => - u.combineWithParams(kAllShaderSources).beginSubcases().combine('sourceMapName', kSourceMapsKeys) - ) + .params(u => u.combineWithParams(kAllShaderSources)) .fn(async t => { - const { _code, valid, sourceMapName } = t.params; + const { _code, valid } = t.params; const shaderModule = t.expectGPUError( 'validation', () => { - const sourceMap = kSourceMaps[sourceMapName]; - return t.device.createShaderModule({ code: _code, ...(sourceMap && { sourceMap }) }); + return t.device.createShaderModule({ code: _code }); }, !valid ); diff --git a/src/webgpu/api/operation/texture_view/write.spec.ts b/src/webgpu/api/operation/texture_view/write.spec.ts index 43b27f2874a5..aa41e7e176ea 100644 --- a/src/webgpu/api/operation/texture_view/write.spec.ts +++ b/src/webgpu/api/operation/texture_view/write.spec.ts @@ -36,6 +36,9 @@ const kTextureViewWriteMethods = [ ] as const; type TextureViewWriteMethod = (typeof kTextureViewWriteMethods)[number]; +const kTextureViewUsageMethods = ['inherit', 'minimal'] as const; +type TextureViewUsageMethod = (typeof kTextureViewUsageMethods)[number]; + // Src color values to read from a shader array. const kColorsFloat = [ { R: 1.0, G: 0.0, B: 0.0, A: 0.8 }, @@ -271,6 +274,22 @@ function writeTextureAndGetExpectedTexelView( return expectedTexelView; } +function getTextureViewUsage( + viewUsageMethod: TextureViewUsageMethod, + minimalUsageForTest: GPUTextureUsageFlags +) { + switch (viewUsageMethod) { + case 'inherit': + return 0; + + case 'minimal': + return minimalUsageForTest; + + default: + unreachable(); + } +} + g.test('format') .desc( `Views of every allowed format. @@ -280,6 +299,7 @@ Read values from color array in the shader, and write it to the texture view via - x= every texture format - x= sampleCount {1, 4} if valid - x= every possible view write method (see above) +- x= inherited or minimal texture view usage TODO: Test sampleCount > 1 for 'render-pass-store' after extending copySinglePixelTextureToBufferUsingComputePass to read multiple pixels from multisampled textures. [1] @@ -318,6 +338,7 @@ TODO: Test rgb10a2uint when TexelRepresentation.numericRange is made per-compone } return true; }) + .combine('viewUsageMethod', kTextureViewUsageMethods) ) .beforeAllSubcases(t => { const { format, method } = t.params; @@ -332,13 +353,12 @@ TODO: Test rgb10a2uint when TexelRepresentation.numericRange is made per-compone } }) .fn(t => { - const { format, method, sampleCount } = t.params; + const { format, method, sampleCount, viewUsageMethod } = t.params; - const usage = - GPUTextureUsage.COPY_SRC | - (method.includes('storage') - ? GPUTextureUsage.STORAGE_BINDING - : GPUTextureUsage.RENDER_ATTACHMENT); + const textureUsageForMethod = method.includes('storage') + ? GPUTextureUsage.STORAGE_BINDING + : GPUTextureUsage.RENDER_ATTACHMENT; + const usage = GPUTextureUsage.COPY_SRC | textureUsageForMethod; const texture = t.createTextureTracked({ format, @@ -347,7 +367,9 @@ TODO: Test rgb10a2uint when TexelRepresentation.numericRange is made per-compone sampleCount, }); - const view = texture.createView(); + const view = texture.createView({ + usage: getTextureViewUsage(viewUsageMethod, textureUsageForMethod), + }); const expectedTexelView = writeTextureAndGetExpectedTexelView( t, method, diff --git a/src/webgpu/api/validation/capability_checks/features/clip_distances.spec.ts b/src/webgpu/api/validation/capability_checks/features/clip_distances.spec.ts new file mode 100644 index 000000000000..02ffd9e99db6 --- /dev/null +++ b/src/webgpu/api/validation/capability_checks/features/clip_distances.spec.ts @@ -0,0 +1,161 @@ +import { range } from '../../../../../common/util/util.js'; +import { align } from '../../../../util/math.js'; +import { kMaximumLimitBaseParams, makeLimitTestGroup } from '../limits/limit_utils.js'; + +function getPipelineDescriptorWithClipDistances( + device: GPUDevice, + interStageShaderVariables: number, + pointList: boolean, + clipDistances: number, + startLocation: number = 0 +): GPURenderPipelineDescriptor { + const vertexOutputVariables = + interStageShaderVariables - (pointList ? 1 : 0) - align(clipDistances, 4) / 4; + const maxVertexOutputVariables = + device.limits.maxInterStageShaderVariables - (pointList ? 1 : 0) - align(clipDistances, 4) / 4; + + const varyings = ` + ${range( + vertexOutputVariables, + i => `@location(${i + startLocation}) v4_${i + startLocation}: vec4f,` + ).join('\n')} + `; + + const code = ` + // test value : ${interStageShaderVariables} + // maxInterStageShaderVariables : ${device.limits.maxInterStageShaderVariables} + // num variables in vertex shader : ${vertexOutputVariables}${ + pointList ? ' + point-list' : '' + }${ + clipDistances > 0 + ? ` + ${align(clipDistances, 4) / 4} (clip_distances[${clipDistances}])` + : '' + } + // maxInterStageVariables: : ${maxVertexOutputVariables} + // num used inter stage variables : ${vertexOutputVariables} + // vertex output start location : ${startLocation} + + enable clip_distances; + + struct VSOut { + @builtin(position) p: vec4f, + ${varyings} + ${ + clipDistances > 0 + ? `@builtin(clip_distances) clipDistances: array,` + : '' + } + } + struct FSIn { + ${varyings} + } + struct FSOut { + @location(0) color: vec4f, + } + @vertex fn vs() -> VSOut { + var o: VSOut; + o.p = vec4f(0); + return o; + } + @fragment fn fs(i: FSIn) -> FSOut { + var o: FSOut; + o.color = vec4f(0); + return o; + } + `; + const module = device.createShaderModule({ code }); + const pipelineDescriptor: GPURenderPipelineDescriptor = { + layout: 'auto', + primitive: { + topology: pointList ? 'point-list' : 'triangle-list', + }, + vertex: { + module, + }, + fragment: { + module, + targets: [ + { + format: 'rgba8unorm', + }, + ], + }, + }; + return pipelineDescriptor; +} + +const limit = 'maxInterStageShaderVariables'; +export const { g, description } = makeLimitTestGroup(limit); + +g.test('createRenderPipeline,at_over') + .desc(`Test using at and over ${limit} limit with clip_distances in createRenderPipeline(Async)`) + .params( + kMaximumLimitBaseParams + .combine('async', [false, true]) + .combine('pointList', [false, true]) + .combine('clipDistances', [1, 2, 3, 4, 5, 6, 7, 8]) + ) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('clip-distances'); + }) + .fn(async t => { + const { limitTest, testValueName, async, pointList, clipDistances } = t.params; + await t.testDeviceWithRequestedMaximumLimits( + limitTest, + testValueName, + async ({ device, testValue, shouldError }) => { + const pipelineDescriptor = getPipelineDescriptorWithClipDistances( + device, + testValue, + pointList, + clipDistances + ); + + await t.testCreateRenderPipeline(pipelineDescriptor, async, shouldError); + }, + undefined, + ['clip-distances'] + ); + }); + +g.test('createRenderPipeline,max_vertex_output_location') + .desc(`Test using clip_distances will limit the maximum value of vertex output location`) + .params(u => + u + .combine('pointList', [false, true]) + .combine('clipDistances', [1, 2, 3, 4, 5, 6, 7, 8]) + .combine('startLocation', [0, 1, 2]) + ) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('clip-distances'); + }) + .fn(async t => { + const { pointList, clipDistances, startLocation } = t.params; + + const maxInterStageShaderVariables = t.adapter.limits.maxInterStageShaderVariables; + const deviceInTest = await t.requestDeviceTracked(t.adapter, { + requiredFeatures: ['clip-distances'], + requiredLimits: { + maxInterStageShaderVariables: t.adapter.limits.maxInterStageShaderVariables, + }, + }); + const pipelineDescriptor = getPipelineDescriptorWithClipDistances( + deviceInTest, + maxInterStageShaderVariables, + pointList, + clipDistances, + startLocation + ); + const vertexOutputVariables = + maxInterStageShaderVariables - (pointList ? 1 : 0) - align(clipDistances, 4) / 4; + const maxLocationInTest = startLocation + vertexOutputVariables - 1; + const maxAllowedLocation = maxInterStageShaderVariables - 1 - align(clipDistances, 4) / 4; + const shouldError = maxLocationInTest > maxAllowedLocation; + + deviceInTest.pushErrorScope('validation'); + deviceInTest.createRenderPipeline(pipelineDescriptor); + const error = await deviceInTest.popErrorScope(); + t.expect(!!error === shouldError, `${error?.message || 'no error when one was expected'}`); + + deviceInTest.destroy(); + }); diff --git a/src/webgpu/api/validation/capability_checks/features/texture_formats.spec.ts b/src/webgpu/api/validation/capability_checks/features/texture_formats.spec.ts index 6941dac6b265..3def716d59de 100644 --- a/src/webgpu/api/validation/capability_checks/features/texture_formats.spec.ts +++ b/src/webgpu/api/validation/capability_checks/features/texture_formats.spec.ts @@ -5,6 +5,7 @@ Tests for capability checking for features enabling optional texture formats. import { makeTestGroup } from '../../../../../common/framework/test_group.js'; import { getGPU } from '../../../../../common/util/navigator_gpu.js'; import { assert } from '../../../../../common/util/util.js'; +import { kCanvasTextureFormats } from '../../../../capability_info.js'; import { kAllTextureFormats, kTextureFormatInfo } from '../../../../format_info.js'; import { kAllCanvasTypes, createCanvas } from '../../../../util/create_elements.js'; import { ValidationTest } from '../../validation_test.js'; @@ -161,15 +162,15 @@ g.test('canvas_configuration') usage: GPUTextureUsage.COPY_SRC | GPUTextureUsage.COPY_DST, }; - if (enable_required_feature) { - t.expectValidationError(() => { - ctx.configure(canvasConf); - }); - } else { - t.shouldThrow('TypeError', () => { - ctx.configure(canvasConf); - }); - } + const expectedError = + enable_required_feature && + (kCanvasTextureFormats as unknown as Array).includes(format) + ? false + : 'TypeError'; + + t.shouldThrow(expectedError, () => { + ctx.configure(canvasConf); + }); }); g.test('canvas_configuration_view_formats') diff --git a/src/webgpu/api/validation/capability_checks/limits/limit_utils.ts b/src/webgpu/api/validation/capability_checks/limits/limit_utils.ts index ea44b11c9148..14f1642cea9f 100644 --- a/src/webgpu/api/validation/capability_checks/limits/limit_utils.ts +++ b/src/webgpu/api/validation/capability_checks/limits/limit_utils.ts @@ -535,11 +535,16 @@ export class LimitTestsImpl extends GPUTestBase { limitTest: MaximumLimitValueTest, testValueName: MaximumTestValue, fn: (inputs: MaximumLimitTestInputs) => void | Promise, - extraLimits?: LimitsRequest + extraLimits?: LimitsRequest, + extraFeatures: GPUFeatureName[] = [] ) { assert(!this._device); - const deviceAndLimits = await this._getDeviceWithRequestedMaximumLimit(limitTest, extraLimits); + const deviceAndLimits = await this._getDeviceWithRequestedMaximumLimit( + limitTest, + extraLimits, + extraFeatures + ); // If we request over the limit requestDevice will throw if (!deviceAndLimits) { return; diff --git a/src/webgpu/api/validation/capability_checks/limits/maxInterStageShaderComponents.spec.ts b/src/webgpu/api/validation/capability_checks/limits/maxInterStageShaderComponents.spec.ts deleted file mode 100644 index 1963d9f28c6c..000000000000 --- a/src/webgpu/api/validation/capability_checks/limits/maxInterStageShaderComponents.spec.ts +++ /dev/null @@ -1,153 +0,0 @@ -import { range } from '../../../../../common/util/util.js'; - -import { kMaximumLimitBaseParams, LimitsRequest, makeLimitTestGroup } from './limit_utils.js'; - -function getPipelineDescriptor( - device: GPUDevice, - testValue: number, - pointList: boolean, - frontFacing: boolean, - sampleIndex: boolean, - sampleMaskIn: boolean, - sampleMaskOut: boolean -): { pipelineDescriptor: GPURenderPipelineDescriptor; code: string } { - const success = testValue <= device.limits.maxInterStageShaderComponents; - - const maxVertexOutputComponents = - device.limits.maxInterStageShaderComponents - (pointList ? 1 : 0); - const maxFragmentInputComponents = - device.limits.maxInterStageShaderComponents - - (frontFacing ? 1 : 0) - - (sampleIndex ? 1 : 0) - - (sampleMaskIn ? 1 : 0); - const maxOutputComponents = Math.min(maxVertexOutputComponents, maxFragmentInputComponents); - const maxInterStageVariables = Math.floor(maxOutputComponents / 4); - const maxUserDefinedVertexComponents = Math.floor(maxVertexOutputComponents / 4) * 4; - const maxUserDefinedFragmentComponents = Math.floor(maxFragmentInputComponents / 4) * 4; - - const numInterStageVariables = success ? maxInterStageVariables : maxInterStageVariables + 1; - const numUserDefinedComponents = numInterStageVariables * 4; - - const varyings = ` - ${range(numInterStageVariables, i => `@location(${i}) v4_${i}: vec4f,`).join('\n')} - `; - - const code = ` - // test value : ${testValue} - // maxInterStageShaderComponents : ${device.limits.maxInterStageShaderComponents} - // num components in vertex shader : ${numUserDefinedComponents}${ - pointList ? ' + point-list' : '' - } - // num components in fragment shader : ${numUserDefinedComponents}${ - frontFacing ? ' + front-facing' : '' - }${sampleIndex ? ' + sample_index' : ''}${sampleMaskIn ? ' + sample_mask' : ''} - // maxUserDefinedVertexShaderOutputComponents : ${maxUserDefinedVertexComponents} - // maxUserDefinedFragmentShaderInputComponents : ${maxUserDefinedFragmentComponents} - // maxInterStageVariables: : ${maxInterStageVariables} - // num used inter stage variables : ${numInterStageVariables} - - struct VSOut { - @builtin(position) p: vec4f, - ${varyings} - } - struct FSIn { - ${frontFacing ? '@builtin(front_facing) frontFacing: bool,' : ''} - ${sampleIndex ? '@builtin(sample_index) sampleIndex: u32,' : ''} - ${sampleMaskIn ? '@builtin(sample_mask) sampleMask: u32,' : ''} - ${varyings} - } - struct FSOut { - @location(0) color: vec4f, - ${sampleMaskOut ? '@builtin(sample_mask) sampleMask: u32,' : ''} - } - @vertex fn vs() -> VSOut { - var o: VSOut; - o.p = vec4f(0); - return o; - } - @fragment fn fs(i: FSIn) -> FSOut { - var o: FSOut; - o.color = vec4f(0); - return o; - } - `; - const module = device.createShaderModule({ code }); - const pipelineDescriptor: GPURenderPipelineDescriptor = { - layout: 'auto', - primitive: { - topology: pointList ? 'point-list' : 'triangle-list', - }, - vertex: { - module, - entryPoint: 'vs', - }, - fragment: { - module, - entryPoint: 'fs', - targets: [ - { - format: 'rgba8unorm', - }, - ], - }, - }; - return { pipelineDescriptor, code }; -} - -const limit = 'maxInterStageShaderComponents'; -export const { g, description } = makeLimitTestGroup(limit); - -g.test('createRenderPipeline,at_over') - .desc(`Test using at and over ${limit} limit in createRenderPipeline(Async)`) - .params( - kMaximumLimitBaseParams - .combine('async', [false, true]) - .combine('pointList', [false, true]) - .combine('frontFacing', [false, true]) - .combine('sampleIndex', [false, true]) - .combine('sampleMaskIn', [false, true]) - .combine('sampleMaskOut', [false, true]) - ) - .beforeAllSubcases(t => { - if (t.isCompatibility) { - t.skipIf( - t.params.sampleMaskIn || t.params.sampleMaskOut, - 'sample_mask not supported in compatibility mode' - ); - t.skipIf(t.params.sampleIndex, 'sample_index not supported in compatibility mode'); - } - }) - .fn(async t => { - const { - limitTest, - testValueName, - async, - pointList, - frontFacing, - sampleIndex, - sampleMaskIn, - sampleMaskOut, - } = t.params; - // Request the largest value of maxInterStageShaderVariables to allow the test using as many - // inter-stage shader components as possible without being limited by - // maxInterStageShaderVariables. - const extraLimits: LimitsRequest = { maxInterStageShaderVariables: 'adapterLimit' }; - await t.testDeviceWithRequestedMaximumLimits( - limitTest, - testValueName, - async ({ device, testValue, shouldError }) => { - const { pipelineDescriptor, code } = getPipelineDescriptor( - device, - testValue, - pointList, - frontFacing, - sampleIndex, - sampleMaskIn, - sampleMaskOut - ); - - await t.testCreateRenderPipeline(pipelineDescriptor, async, shouldError, code); - }, - extraLimits - ); - }); diff --git a/src/webgpu/api/validation/capability_checks/limits/maxInterStageShaderVariables.spec.ts b/src/webgpu/api/validation/capability_checks/limits/maxInterStageShaderVariables.spec.ts index e54b7f7df178..5298e8c21587 100644 --- a/src/webgpu/api/validation/capability_checks/limits/maxInterStageShaderVariables.spec.ts +++ b/src/webgpu/api/validation/capability_checks/limits/maxInterStageShaderVariables.spec.ts @@ -1,26 +1,86 @@ +import { range } from '../../../../../common/util/util.js'; + import { kMaximumLimitBaseParams, makeLimitTestGroup } from './limit_utils.js'; -function getPipelineDescriptor(device: GPUDevice, testValue: number): GPURenderPipelineDescriptor { +function getPipelineDescriptor( + device: GPUDevice, + testValue: number, + pointList: boolean, + frontFacing: boolean, + sampleIndex: boolean, + sampleMaskIn: boolean, + sampleMaskOut: boolean +): GPURenderPipelineDescriptor { + const vertexOutputVariables = testValue - (pointList ? 1 : 0); + const fragmentInputVariables = testValue - (frontFacing || sampleIndex || sampleMaskIn ? 1 : 0); + const numInterStageVariables = Math.min(vertexOutputVariables, fragmentInputVariables); + + const maxVertexOutputVariables = device.limits.maxInterStageShaderVariables - (pointList ? 1 : 0); + const maxFragmentInputVariables = + device.limits.maxInterStageShaderVariables - + (frontFacing || sampleIndex || sampleMaskIn ? 1 : 0); + const maxInterStageVariables = Math.min(maxVertexOutputVariables, maxFragmentInputVariables); + + const varyings = ` + ${range(numInterStageVariables, i => `@location(${i}) v4_${i}: vec4f,`).join('\n')} + `; + const code = ` + // test value : ${testValue} + // maxInterStageShaderVariables : ${device.limits.maxInterStageShaderVariables} + // num variables in vertex shader : ${vertexOutputVariables}${pointList ? ' + point-list' : ''} + // num variables in fragment shader : ${fragmentInputVariables}${ + frontFacing ? ' + front-facing' : '' + }${sampleIndex ? ' + sample_index' : ''}${sampleMaskIn ? ' + sample_mask' : ''} + // maxInterStageVariables: : ${maxInterStageVariables} + // num used inter stage variables : ${numInterStageVariables} + struct VSOut { @builtin(position) p: vec4f, - @location(${testValue}) v: f32, + ${varyings} + } + struct FSIn { + ${frontFacing ? '@builtin(front_facing) frontFacing: bool,' : ''} + ${sampleIndex ? '@builtin(sample_index) sampleIndex: u32,' : ''} + ${sampleMaskIn ? '@builtin(sample_mask) sampleMask: u32,' : ''} + ${varyings} + } + struct FSOut { + @location(0) color: vec4f, + ${sampleMaskOut ? '@builtin(sample_mask) sampleMask: u32,' : ''} } @vertex fn vs() -> VSOut { var o: VSOut; o.p = vec4f(0); - o.v = 1.0; + return o; + } + @fragment fn fs(i: FSIn) -> FSOut { + var o: FSOut; + o.color = vec4f(0); return o; } `; const module = device.createShaderModule({ code }); - return { + const pipelineDescriptor: GPURenderPipelineDescriptor = { layout: 'auto', + primitive: { + topology: pointList ? 'point-list' : 'triangle-list', + }, vertex: { module, entryPoint: 'vs', }, + fragment: { + module, + entryPoint: 'fs', + targets: [ + { + format: 'rgba8unorm', + }, + ], + }, }; + return pipelineDescriptor; } const limit = 'maxInterStageShaderVariables'; @@ -28,15 +88,48 @@ export const { g, description } = makeLimitTestGroup(limit); g.test('createRenderPipeline,at_over') .desc(`Test using at and over ${limit} limit in createRenderPipeline(Async)`) - .params(kMaximumLimitBaseParams.combine('async', [false, true])) + .params( + kMaximumLimitBaseParams + .combine('async', [false, true]) + .combine('pointList', [false, true]) + .combine('frontFacing', [false, true]) + .combine('sampleIndex', [false, true]) + .combine('sampleMaskIn', [false, true]) + .combine('sampleMaskOut', [false, true]) + ) + .beforeAllSubcases(t => { + if (t.isCompatibility) { + t.skipIf( + t.params.sampleMaskIn || t.params.sampleMaskOut, + 'sample_mask not supported in compatibility mode' + ); + t.skipIf(t.params.sampleIndex, 'sample_index not supported in compatibility mode'); + } + }) .fn(async t => { - const { limitTest, testValueName, async } = t.params; + const { + limitTest, + testValueName, + async, + pointList, + frontFacing, + sampleIndex, + sampleMaskIn, + sampleMaskOut, + } = t.params; await t.testDeviceWithRequestedMaximumLimits( limitTest, testValueName, async ({ device, testValue, shouldError }) => { - const lastIndex = testValue - 1; - const pipelineDescriptor = getPipelineDescriptor(device, lastIndex); + const pipelineDescriptor = getPipelineDescriptor( + device, + testValue, + pointList, + frontFacing, + sampleIndex, + sampleMaskIn, + sampleMaskOut + ); await t.testCreateRenderPipeline(pipelineDescriptor, async, shouldError); } diff --git a/src/webgpu/api/validation/capability_checks/limits/maxVertexAttributes.spec.ts b/src/webgpu/api/validation/capability_checks/limits/maxVertexAttributes.spec.ts index 9e5aaa144bfa..b37cc9230931 100644 --- a/src/webgpu/api/validation/capability_checks/limits/maxVertexAttributes.spec.ts +++ b/src/webgpu/api/validation/capability_checks/limits/maxVertexAttributes.spec.ts @@ -19,6 +19,7 @@ function getPipelineDescriptor(device: GPUDevice, lastIndex: number): GPURenderP }, ], }, + depthStencil: { format: 'depth32float', depthWriteEnabled: true, depthCompare: 'always' }, }; } diff --git a/src/webgpu/api/validation/capability_checks/limits/maxVertexBufferArrayStride.spec.ts b/src/webgpu/api/validation/capability_checks/limits/maxVertexBufferArrayStride.spec.ts index 0af5724f2a2a..be9c7ffd7f7b 100644 --- a/src/webgpu/api/validation/capability_checks/limits/maxVertexBufferArrayStride.spec.ts +++ b/src/webgpu/api/validation/capability_checks/limits/maxVertexBufferArrayStride.spec.ts @@ -32,6 +32,7 @@ function getPipelineDescriptor(device: GPUDevice, testValue: number): GPURenderP }, ], }, + depthStencil: { format: 'depth32float', depthWriteEnabled: true, depthCompare: 'always' }, }; } diff --git a/src/webgpu/api/validation/capability_checks/limits/maxVertexBuffers.spec.ts b/src/webgpu/api/validation/capability_checks/limits/maxVertexBuffers.spec.ts index 9a4108cb0c08..02701de0d1e9 100644 --- a/src/webgpu/api/validation/capability_checks/limits/maxVertexBuffers.spec.ts +++ b/src/webgpu/api/validation/capability_checks/limits/maxVertexBuffers.spec.ts @@ -19,6 +19,7 @@ function getPipelineDescriptor(device: GPUDevice, testValue: number): GPURenderP module, buffers, }, + depthStencil: { format: 'depth32float', depthWriteEnabled: true, depthCompare: 'always' }, }; } diff --git a/src/webgpu/api/validation/createView.spec.ts b/src/webgpu/api/validation/createView.spec.ts index 56a603b714e8..c3e56bb4f011 100644 --- a/src/webgpu/api/validation/createView.spec.ts +++ b/src/webgpu/api/validation/createView.spec.ts @@ -6,8 +6,10 @@ import { unreachable } from '../../../common/util/util.js'; import { kTextureAspects, kTextureDimensions, + kTextureUsages, kTextureViewDimensions, } from '../../capability_info.js'; +import { GPUConst } from '../../constants.js'; import { kTextureFormatInfo, kAllTextureFormats, @@ -339,3 +341,73 @@ g.test('texture_state') texture.createView(); }, state === 'invalid'); }); + +g.test('texture_view_usage') + .desc( + `Test texture view usage (single, combined, inherited) for every texture format and texture usage` + ) + .params(u => + u // + .combine('format', kAllTextureFormats) + .combine('textureUsage0', kTextureUsages) + .combine('textureUsage1', kTextureUsages) + .filter(({ format, textureUsage0, textureUsage1 }) => { + const info = kTextureFormatInfo[format]; + const textureUsage = textureUsage0 | textureUsage1; + + if ( + (textureUsage & GPUConst.TextureUsage.RENDER_ATTACHMENT) !== 0 && + info.color && + !info.colorRender + ) { + return false; + } + + return true; + }) + .beginSubcases() + .combine('textureViewUsage0', [0, ...kTextureUsages]) + .combine('textureViewUsage1', [0, ...kTextureUsages]) + ) + .beforeAllSubcases(t => { + const { format, textureUsage0, textureUsage1 } = t.params; + const info = kTextureFormatInfo[format]; + const textureUsage = textureUsage0 | textureUsage1; + t.skipIfTextureFormatNotSupported(format); + t.selectDeviceOrSkipTestCase(info.feature); + if (textureUsage & GPUTextureUsage.STORAGE_BINDING) { + t.skipIfTextureFormatNotUsableAsStorageTexture(format); + } + }) + .fn(t => { + const { format, textureUsage0, textureUsage1, textureViewUsage0, textureViewUsage1 } = t.params; + const info = kTextureFormatInfo[format]; + + const size = [info.blockWidth, info.blockHeight, 1]; + const dimension = '2d'; + const mipLevelCount = 1; + const usage = textureUsage0 | textureUsage1; + + const textureDescriptor: GPUTextureDescriptor = { + size, + mipLevelCount, + dimension, + format, + usage, + }; + + const texture = t.createTextureTracked(textureDescriptor); + + let success = true; + + const textureViewUsage = textureViewUsage0 | textureViewUsage1; + + // Texture view usage must be a subset of texture usage + if ((~usage & textureViewUsage) !== 0) success = false; + + t.expectValidationError(() => { + texture.createView({ + usage: textureViewUsage, + }); + }, !success); + }); diff --git a/src/webgpu/api/validation/layout_shader_compat.spec.ts b/src/webgpu/api/validation/layout_shader_compat.spec.ts index 2b5e609c55d6..5ee16510c77a 100644 --- a/src/webgpu/api/validation/layout_shader_compat.spec.ts +++ b/src/webgpu/api/validation/layout_shader_compat.spec.ts @@ -253,6 +253,7 @@ g.test('pipeline_layout_shader_exact_match') code: vertexShader, }), }, + depthStencil: { format: 'depth32float', depthWriteEnabled: true, depthCompare: 'always' }, }); break; } diff --git a/src/webgpu/api/validation/render_pipeline/float32_blendable.spec.ts b/src/webgpu/api/validation/render_pipeline/float32_blendable.spec.ts new file mode 100644 index 000000000000..ed387b5a8769 --- /dev/null +++ b/src/webgpu/api/validation/render_pipeline/float32_blendable.spec.ts @@ -0,0 +1,47 @@ +export const description = ` +Tests for capabilities added by float32-blendable flag. +`; + +import { makeTestGroup } from '../../../../common/framework/test_group.js'; +import { ColorTextureFormat } from '../../../format_info.js'; + +import { CreateRenderPipelineValidationTest } from './common.js'; + +export const g = makeTestGroup(CreateRenderPipelineValidationTest); + +const kFloat32Formats: ColorTextureFormat[] = ['r32float', 'rg32float', 'rgba32float']; + +g.test('create_render_pipeline') + .desc( + ` +Tests that the float32-blendable feature is required to create a render +pipeline that uses blending with any float32-format attachment. +` + ) + .params(u => + u + .combine('isAsync', [false, true]) + .combine('enabled', [true, false] as const) + .beginSubcases() + .combine('hasBlend', [true, false] as const) + .combine('format', kFloat32Formats) + ) + .beforeAllSubcases(t => { + if (t.params.enabled) { + t.selectDeviceOrSkipTestCase('float32-blendable'); + } + }) + .fn(t => { + const { isAsync, enabled, hasBlend, format } = t.params; + + const descriptor = t.getDescriptor({ + targets: [ + { + format, + blend: hasBlend ? { color: {}, alpha: {} } : undefined, + }, + ], + }); + + t.doCreateRenderPipelineTest(isAsync, enabled || !hasBlend, descriptor); + }); diff --git a/src/webgpu/api/validation/render_pipeline/inter_stage.spec.ts b/src/webgpu/api/validation/render_pipeline/inter_stage.spec.ts index 1a8dec37464f..a3af6d675ac3 100644 --- a/src/webgpu/api/validation/render_pipeline/inter_stage.spec.ts +++ b/src/webgpu/api/validation/render_pipeline/inter_stage.spec.ts @@ -273,39 +273,29 @@ g.test('max_shader_variable_location') t.doCreateRenderPipelineTest(isAsync, location < maxInterStageShaderVariables, descriptor); }); -g.test('max_components_count,output') +g.test('max_variables_count,output') .desc( - `Tests that validation should fail when scalar components of all user-defined outputs > max vertex shader output components.` + `Tests that validation should fail when all user-defined outputs > max vertex shader output + variables.` ) .params(u => u.combine('isAsync', [false, true]).combineWithParams([ - // Number of user-defined output scalar components in test shader = - // Math.floor((device.limits.maxInterStageShaderComponents + numScalarDelta) / 4) * 4. - { numScalarDelta: 0, topology: 'triangle-list', _success: true }, - { numScalarDelta: 1, topology: 'triangle-list', _success: false }, - { numScalarDelta: 0, topology: 'point-list', _success: false }, - { numScalarDelta: -1, topology: 'point-list', _success: false }, - { numScalarDelta: -3, topology: 'point-list', _success: false }, - { numScalarDelta: -4, topology: 'point-list', _success: true }, + // Number of user-defined output variables in test shader = + // device.limits.maxInterStageShaderVariables + numVariablesDelta + { numVariablesDelta: 0, topology: 'triangle-list', _success: true }, + { numVariablesDelta: 1, topology: 'triangle-list', _success: false }, + { numVariablesDelta: 0, topology: 'point-list', _success: false }, + { numVariablesDelta: -1, topology: 'point-list', _success: true }, ] as const) ) .fn(t => { - const { isAsync, numScalarDelta, topology, _success } = t.params; + const { isAsync, numVariablesDelta, topology, _success } = t.params; - const numScalarComponents = t.device.limits.maxInterStageShaderComponents + numScalarDelta; - - const numVec4 = Math.floor(numScalarComponents / 4); - const numTrailingScalars = numScalarComponents % 4; + const numVec4 = t.device.limits.maxInterStageShaderVariables + numVariablesDelta; const outputs = range(numVec4, i => `@location(${i}) vout${i}: vec4`); const inputs = range(numVec4, i => `@location(${i}) fin${i}: vec4`); - if (numTrailingScalars > 0) { - const typeString = numTrailingScalars === 1 ? 'f32' : `vec${numTrailingScalars}`; - outputs.push(`@location(${numVec4}) vout${numVec4}: ${typeString}`); - inputs.push(`@location(${numVec4}) fin${numVec4}: ${typeString}`); - } - const descriptor = t.getDescriptorWithStates( t.getVertexStateWithOutputs(outputs), t.getFragmentStateWithInputs(inputs) @@ -315,42 +305,32 @@ g.test('max_components_count,output') t.doCreateRenderPipelineTest(isAsync, _success, descriptor); }); -g.test('max_components_count,input') +g.test('max_variables_count,input') .desc( - `Tests that validation should fail when scalar components of all user-defined inputs > max vertex shader output components.` + `Tests that validation should fail when all user-defined inputs > max vertex shader output + variables.` ) .params(u => u.combine('isAsync', [false, true]).combineWithParams([ - // Number of user-defined input scalar components in test shader = - // Math.floor((device.limits.maxInterStageShaderComponents + numScalarDelta) / 4) * 4. - { numScalarDelta: 0, useExtraBuiltinInputs: false }, - { numScalarDelta: 1, useExtraBuiltinInputs: false }, - { numScalarDelta: 0, useExtraBuiltinInputs: true }, - { numScalarDelta: -3, useExtraBuiltinInputs: true }, - { numScalarDelta: -4, useExtraBuiltinInputs: true }, + // Number of user-defined output variables in test shader = + // device.limits.maxInterStageShaderVariables + numVariablesDelta + { numVariablesDelta: 0, useExtraBuiltinInputs: false }, + { numVariablesDelta: 1, useExtraBuiltinInputs: false }, + { numVariablesDelta: 0, useExtraBuiltinInputs: true }, + { numVariablesDelta: -1, useExtraBuiltinInputs: true }, ] as const) ) .fn(t => { - const { isAsync, numScalarDelta, useExtraBuiltinInputs } = t.params; + const { isAsync, numVariablesDelta, useExtraBuiltinInputs } = t.params; - const numScalarComponents = - Math.floor((t.device.limits.maxInterStageShaderComponents + numScalarDelta) / 4) * 4; - const numExtraComponents = useExtraBuiltinInputs ? (t.isCompatibility ? 2 : 3) : 0; - const numUsedComponents = numScalarComponents + numExtraComponents; - const success = numUsedComponents <= t.device.limits.maxInterStageShaderComponents; - - const numVec4 = Math.floor(numScalarComponents / 4); - const numTrailingScalars = numScalarComponents % 4; + const numVec4 = t.device.limits.maxInterStageShaderVariables + numVariablesDelta; + const numExtraVariables = useExtraBuiltinInputs ? 1 : 0; + const numUsedVariables = numVec4 + numExtraVariables; + const success = numUsedVariables <= t.device.limits.maxInterStageShaderVariables; const outputs = range(numVec4, i => `@location(${i}) vout${i}: vec4`); const inputs = range(numVec4, i => `@location(${i}) fin${i}: vec4`); - if (numTrailingScalars > 0) { - const typeString = numTrailingScalars === 1 ? 'f32' : `vec${numTrailingScalars}`; - outputs.push(`@location(${numVec4}) vout${numVec4}: ${typeString}`); - inputs.push(`@location(${numVec4}) fin${numVec4}: ${typeString}`); - } - if (useExtraBuiltinInputs) { inputs.push('@builtin(front_facing) front_facing_in: bool'); if (!t.isCompatibility) { diff --git a/src/webgpu/api/validation/render_pipeline/misc.spec.ts b/src/webgpu/api/validation/render_pipeline/misc.spec.ts index 861eb4d24c7f..d10c7ca99985 100644 --- a/src/webgpu/api/validation/render_pipeline/misc.spec.ts +++ b/src/webgpu/api/validation/render_pipeline/misc.spec.ts @@ -36,7 +36,7 @@ g.test('no_attachment') g.test('vertex_state_only') .desc( `Tests creating vertex-state-only render pipeline. A vertex-only render pipeline has no fragment -state (and thus has no color state), and can be created with or without depth stencil state.` +state (and thus has no color state), and must have a depth-stencil state as an attachment is required.` ) .params(u => u @@ -76,7 +76,7 @@ state (and thus has no color state), and can be created with or without depth st targets: hasColor ? [{ format: 'rgba8unorm' }] : [], }); - t.doCreateRenderPipelineTest(isAsync, true, descriptor); + t.doCreateRenderPipelineTest(isAsync, depthStencilState !== undefined, descriptor); }); g.test('pipeline_layout,device_mismatch') diff --git a/src/webgpu/api/validation/resource_usages/texture/in_render_common.spec.ts b/src/webgpu/api/validation/resource_usages/texture/in_render_common.spec.ts index 2fbb52f36694..6b65a620fc11 100644 --- a/src/webgpu/api/validation/resource_usages/texture/in_render_common.spec.ts +++ b/src/webgpu/api/validation/resource_usages/texture/in_render_common.spec.ts @@ -180,11 +180,21 @@ g.test('subresources,color_attachment_and_bind_group') { bgLayer: 0, bgLayerCount: 1 }, { bgLayer: 1, bgLayerCount: 1 }, { bgLayer: 1, bgLayerCount: 2 }, + { bgLayer: 0, bgLayerCount: kTextureLayers }, ]) .combine('bgUsage', kTextureBindingTypes) .unless(t => t.bgUsage !== 'sampled-texture' && t.bgLevelCount > 1) .combine('inSamePass', [true, false]) ) + .beforeAllSubcases(t => { + if (t.isCompatibility) { + t.skipIf(t.params.bgLayer !== 0, 'view base array layer must equal 0 in compatibility mode'); + t.skipIf( + t.params.bgLayerCount !== kTextureLayers, + 'view array layers must equal texture array layers in compatibility mode' + ); + } + }) .fn(t => { const { colorAttachmentLevel, @@ -288,6 +298,7 @@ g.test('subresources,depth_stencil_attachment_and_bind_group') { bgLayer: 0, bgLayerCount: 1 }, { bgLayer: 1, bgLayerCount: 1 }, { bgLayer: 1, bgLayerCount: 2 }, + { bgLayer: 0, bgLayerCount: kTextureLayers }, ]) .beginSubcases() .combine('depthReadOnly', [true, false]) @@ -295,6 +306,15 @@ g.test('subresources,depth_stencil_attachment_and_bind_group') .combine('bgAspect', ['depth-only', 'stencil-only'] as const) .combine('inSamePass', [true, false]) ) + .beforeAllSubcases(t => { + if (t.isCompatibility) { + t.skipIf(t.params.bgLayer !== 0, 'view base array layer must equal 0 in compatibility mode'); + t.skipIf( + t.params.bgLayerCount !== kTextureLayers, + 'view array layers must equal texture array layers in compatibility mode' + ); + } + }) .fn(t => { const { dsLevel, @@ -411,6 +431,7 @@ g.test('subresources,multiple_bind_groups') { base: 0, count: 1 }, { base: 1, count: 1 }, { base: 1, count: 2 }, + { base: 0, count: kTextureLayers }, ]) .combine('bg1Levels', [ { base: 0, count: 1 }, @@ -421,6 +442,7 @@ g.test('subresources,multiple_bind_groups') { base: 0, count: 1 }, { base: 1, count: 1 }, { base: 1, count: 2 }, + { base: 0, count: kTextureLayers }, ]) .combine('bgUsage0', kTextureBindingTypes) .combine('bgUsage1', kTextureBindingTypes) @@ -432,6 +454,18 @@ g.test('subresources,multiple_bind_groups') .beginSubcases() .combine('inSamePass', [true, false]) ) + .beforeAllSubcases(t => { + if (t.isCompatibility) { + t.skipIf( + t.params.bg0Layers.base !== 0 || t.params.bg1Layers.base !== 0, + 'view base array layer must equal 0 in compatibility mode' + ); + t.skipIf( + t.params.bg0Layers.count !== kTextureLayers || t.params.bg1Layers.count !== kTextureLayers, + 'view array layers must equal texture array layers in compatibility mode' + ); + } + }) .fn(t => { const { bg0Levels, bg0Layers, bg1Levels, bg1Layers, bgUsage0, bgUsage1, inSamePass } = t.params; @@ -524,6 +558,7 @@ g.test('subresources,depth_stencil_texture_in_bind_groups') { base: 0, count: 1 }, { base: 1, count: 1 }, { base: 1, count: 2 }, + { base: 0, count: kTextureLayers }, ]) .combine('view1Levels', [ { base: 0, count: 1 }, @@ -534,11 +569,25 @@ g.test('subresources,depth_stencil_texture_in_bind_groups') { base: 0, count: 1 }, { base: 1, count: 1 }, { base: 1, count: 2 }, + { base: 0, count: kTextureLayers }, ]) .combine('aspect0', ['depth-only', 'stencil-only'] as const) .combine('aspect1', ['depth-only', 'stencil-only'] as const) .combine('inSamePass', [true, false]) ) + .beforeAllSubcases(t => { + if (t.isCompatibility) { + t.skipIf( + t.params.view0Layers.base !== 0 || t.params.view1Layers.base !== 0, + 'view base array layer must equal 0 in compatibility mode' + ); + t.skipIf( + t.params.view0Layers.count !== kTextureLayers || + t.params.view1Layers.count !== kTextureLayers, + 'view array layers must equal texture array layers in compatibility mode' + ); + } + }) .fn(t => { const { view0Levels, view0Layers, view1Levels, view1Layers, aspect0, aspect1, inSamePass } = t.params; diff --git a/src/webgpu/api/validation/resource_usages/texture/in_render_misc.spec.ts b/src/webgpu/api/validation/resource_usages/texture/in_render_misc.spec.ts index 05f048ac050c..a601fafec2fc 100644 --- a/src/webgpu/api/validation/resource_usages/texture/in_render_misc.spec.ts +++ b/src/webgpu/api/validation/resource_usages/texture/in_render_misc.spec.ts @@ -4,6 +4,7 @@ Texture Usages Validation Tests on All Kinds of WebGPU Subresource Usage Scopes. import { makeTestGroup } from '../../../../../common/framework/test_group.js'; import { unreachable } from '../../../../../common/util/util.js'; +import { kTextureUsages } from '../../../../capability_info.js'; import { ValidationTest } from '../../validation_test.js'; import { TextureBindingType, @@ -571,3 +572,79 @@ g.test('subresources,texture_usages_in_copy_and_render_pass') encoder.finish(); }, false); }); + +g.test('subresources,texture_view_usages') + .desc( + ` + Test that the usages of the texture view are used to validate compatibility in command encoding + instead of the usages of the base texture.` + ) + .params(u => + u + .combine('bindingType', ['color-attachment', ...kTextureBindingTypes] as const) + .combine('viewUsage', [0, ...kTextureUsages]) + ) + .fn(t => { + const { bindingType, viewUsage } = t.params; + + const texture = t.createTextureTracked({ + format: 'r32float', + usage: + GPUTextureUsage.COPY_SRC | + GPUTextureUsage.COPY_DST | + GPUTextureUsage.TEXTURE_BINDING | + GPUTextureUsage.STORAGE_BINDING | + GPUTextureUsage.RENDER_ATTACHMENT, + size: [kTextureSize, kTextureSize, 1], + ...(t.isCompatibility && { + textureBindingViewDimension: '2d-array', + }), + }); + + switch (bindingType) { + case 'color-attachment': { + const encoder = t.device.createCommandEncoder(); + const renderPassEncoder = encoder.beginRenderPass({ + colorAttachments: [ + { view: texture.createView({ usage: viewUsage }), loadOp: 'load', storeOp: 'store' }, + ], + }); + renderPassEncoder.end(); + + const success = viewUsage === 0 || (viewUsage & GPUTextureUsage.RENDER_ATTACHMENT) !== 0; + + t.expectValidationError(() => { + encoder.finish(); + }, !success); + break; + } + case 'sampled-texture': + case 'readonly-storage-texture': + case 'writeonly-storage-texture': + case 'readwrite-storage-texture': + { + let success = true; + if (viewUsage !== 0) { + if (bindingType === 'sampled-texture') { + if ((viewUsage & GPUTextureUsage.TEXTURE_BINDING) === 0) success = false; + } else { + if ((viewUsage & GPUTextureUsage.STORAGE_BINDING) === 0) success = false; + } + } + + t.expectValidationError(() => { + t.createBindGroupForTest( + texture.createView({ + dimension: '2d-array', + usage: viewUsage, + }), + bindingType, + 'unfilterable-float' + ); + }, !success); + } + break; + default: + unreachable(); + } + }); diff --git a/src/webgpu/api/validation/shader_module/entry_point.spec.ts b/src/webgpu/api/validation/shader_module/entry_point.spec.ts index c956dc302144..67dbef1851e1 100644 --- a/src/webgpu/api/validation/shader_module/entry_point.spec.ts +++ b/src/webgpu/api/validation/shader_module/entry_point.spec.ts @@ -128,6 +128,7 @@ and check that the APIs only accept matching entryPoint. module: t.device.createShaderModule({ code }), entryPoint, }, + depthStencil: { format: 'depth32float', depthWriteEnabled: true, depthCompare: 'always' }, }; let _success = true; if (shaderModuleStage !== 'vertex') { @@ -258,6 +259,7 @@ an undefined entryPoint is valid if there's an extra shader stage. }), entryPoint: undefined, }, + depthStencil: { format: 'depth32float', depthWriteEnabled: true, depthCompare: 'always' }, }; const success = extraShaderModuleStage !== 'vertex'; diff --git a/src/webgpu/capability_info.ts b/src/webgpu/capability_info.ts index 24a103dfb4b1..5af80c45fa4c 100644 --- a/src/webgpu/capability_info.ts +++ b/src/webgpu/capability_info.ts @@ -730,7 +730,6 @@ const [kLimitInfoKeys, kLimitInfoDefaults, kLimitInfoData] = 'maxBufferSize': [ , 268435456, 268435456, kMaxUnsignedLongLongValue], 'maxVertexAttributes': [ , 16, 16, ], 'maxVertexBufferArrayStride': [ , 2048, 2048, ], - 'maxInterStageShaderComponents': [ , 64, 60, ], 'maxInterStageShaderVariables': [ , 16, 15, ], 'maxColorAttachments': [ , 8, 4, ], @@ -818,19 +817,22 @@ export const kFeatureNameInfo: { readonly [k in GPUFeatureName]: {}; } = /* prettier-ignore */ { - 'bgra8unorm-storage': {}, - 'depth-clip-control': {}, - 'depth32float-stencil8': {}, - 'texture-compression-bc': {}, - 'texture-compression-etc2': {}, - 'texture-compression-astc': {}, - 'timestamp-query': {}, - 'indirect-first-instance': {}, - 'shader-f16': {}, - 'rg11b10ufloat-renderable': {}, - 'float32-filterable': {}, - 'clip-distances': {}, - 'dual-source-blending': {}, + 'bgra8unorm-storage': {}, + 'depth-clip-control': {}, + 'depth32float-stencil8': {}, + 'texture-compression-bc': {}, + 'texture-compression-bc-sliced-3d': {}, + 'texture-compression-etc2': {}, + 'texture-compression-astc': {}, + 'texture-compression-astc-sliced-3d': {}, + 'timestamp-query': {}, + 'indirect-first-instance': {}, + 'shader-f16': {}, + 'rg11b10ufloat-renderable': {}, + 'float32-filterable': {}, + 'float32-blendable': {}, + 'clip-distances': {}, + 'dual-source-blending': {}, }; /** List of all GPUFeatureName values. */ export const kFeatureNames = keysOf(kFeatureNameInfo); diff --git a/src/webgpu/compat/api/validation/render_pipeline/unsupported_wgsl.spec.ts b/src/webgpu/compat/api/validation/render_pipeline/unsupported_wgsl.spec.ts index 805203870903..a74e8900904c 100644 --- a/src/webgpu/compat/api/validation/render_pipeline/unsupported_wgsl.spec.ts +++ b/src/webgpu/compat/api/validation/render_pipeline/unsupported_wgsl.spec.ts @@ -232,6 +232,7 @@ g.test('unsupportedStorageTextureFormats,renderPipeline') t.doCreateRenderPipelineTest(async, isValid, { layout: 'auto', vertex: { module, entryPoint }, + depthStencil: { format: 'depth32float', depthWriteEnabled: true, depthCompare: 'always' }, }); }); @@ -267,7 +268,7 @@ g.test('textureLoad_with_depth_textures,computePipeline') `, }); - const isValid = !t.isCompatibility; + const isValid = !t.isCompatibility || entryPoint === 'csWithoutDepthUsage'; t.doCreateComputePipelineTest(async, isValid, { layout: 'auto', compute: { module, entryPoint }, @@ -301,9 +302,10 @@ g.test('textureLoad_with_depth_textures,renderPipeline') `, }); - const isValid = !t.isCompatibility; + const isValid = !t.isCompatibility || entryPoint === 'vsWithoutDepthUsage'; t.doCreateRenderPipelineTest(async, isValid, { layout: 'auto', vertex: { module, entryPoint }, + depthStencil: { format: 'depth32float', depthWriteEnabled: true, depthCompare: 'always' }, }); }); diff --git a/src/webgpu/listing_meta.json b/src/webgpu/listing_meta.json index 85fe0bdc6a8b..c455224da996 100644 --- a/src/webgpu/listing_meta.json +++ b/src/webgpu/listing_meta.json @@ -273,6 +273,7 @@ "webgpu:api,validation,buffer,mapping:unmap,state,mappedAtCreation:*": { "subcaseMS": 8.950 }, "webgpu:api,validation,buffer,mapping:unmap,state,mappingPending:*": { "subcaseMS": 22.951 }, "webgpu:api,validation,buffer,mapping:unmap,state,unmapped:*": { "subcaseMS": 74.200 }, + "webgpu:api,validation,capability_checks,features,clip_distances:createRenderPipeline,at_over:*": { "subcaseMS": 13.700 }, "webgpu:api,validation,capability_checks,features,query_types:createQuerySet:*": { "subcaseMS": 10.451 }, "webgpu:api,validation,capability_checks,features,query_types:timestamp:*": { "subcaseMS": 1.200 }, "webgpu:api,validation,capability_checks,features,texture_formats:canvas_configuration:*": { "subcaseMS": 4.339 }, @@ -313,7 +314,6 @@ "webgpu:api,validation,capability_checks,limits,maxComputeWorkgroupsPerDimension:validate:*": { "subcaseMS": 138.900 }, "webgpu:api,validation,capability_checks,limits,maxDynamicStorageBuffersPerPipelineLayout:createBindGroupLayout,at_over:*": { "subcaseMS": 15.680 }, "webgpu:api,validation,capability_checks,limits,maxDynamicUniformBuffersPerPipelineLayout:createBindGroupLayout,at_over:*": { "subcaseMS": 10.268 }, - "webgpu:api,validation,capability_checks,limits,maxInterStageShaderComponents:createRenderPipeline,at_over:*": { "subcaseMS": 12.916 }, "webgpu:api,validation,capability_checks,limits,maxInterStageShaderVariables:createRenderPipeline,at_over:*": { "subcaseMS": 13.700 }, "webgpu:api,validation,capability_checks,limits,maxSampledTexturesPerShaderStage:createBindGroupLayout,at_over:*": { "subcaseMS": 47.857 }, "webgpu:api,validation,capability_checks,limits,maxSampledTexturesPerShaderStage:createPipeline,at_over:*": { "subcaseMS": 45.611 }, @@ -728,9 +728,9 @@ "webgpu:api,validation,render_pipeline,inter_stage:location,mismatch:*": { "subcaseMS": 7.280 }, "webgpu:api,validation,render_pipeline,inter_stage:location,subset:*": { "subcaseMS": 1.250 }, "webgpu:api,validation,render_pipeline,inter_stage:location,superset:*": { "subcaseMS": 0.901 }, - "webgpu:api,validation,render_pipeline,inter_stage:max_components_count,input:*": { "subcaseMS": 6.560 }, - "webgpu:api,validation,render_pipeline,inter_stage:max_components_count,output:*": { "subcaseMS": 8.426 }, "webgpu:api,validation,render_pipeline,inter_stage:max_shader_variable_location:*": { "subcaseMS": 11.050 }, + "webgpu:api,validation,render_pipeline,inter_stage:max_variables_count,input:*": { "subcaseMS": 6.560 }, + "webgpu:api,validation,render_pipeline,inter_stage:max_variables_count,output:*": { "subcaseMS": 8.426 }, "webgpu:api,validation,render_pipeline,inter_stage:type:*": { "subcaseMS": 6.170 }, "webgpu:api,validation,render_pipeline,misc:basic:*": { "subcaseMS": 0.901 }, "webgpu:api,validation,render_pipeline,misc:external_texture:*": { "subcaseMS": 35.189 }, @@ -862,7 +862,6 @@ "webgpu:compat,api,validation,encoding,programmable,pipeline_bind_group_compat:twoDifferentTextureViews,render_pass,used:*": { "subcaseMS": 0.000 }, "webgpu:compat,api,validation,render_pipeline,depth_stencil_state:depthBiasClamp:*": { "subcaseMS": 1.604 }, "webgpu:compat,api,validation,render_pipeline,fragment_state:colorState:*": { "subcaseMS": 32.604 }, - "webgpu:compat,api,validation,render_pipeline,vertex_state:maxVertexAttributesVertexIndexInstanceIndex:*": { "subcaseMS": 3.700 }, "webgpu:compat,api,validation,render_pipeline,unsupported_wgsl:interpolate:*": { "subcaseMS": 3.488 }, "webgpu:compat,api,validation,render_pipeline,unsupported_wgsl:sample_index:*": { "subcaseMS": 0.487 }, "webgpu:compat,api,validation,render_pipeline,unsupported_wgsl:sample_mask:*": { "subcaseMS": 0.408 }, @@ -870,6 +869,7 @@ "webgpu:compat,api,validation,render_pipeline,unsupported_wgsl:textureLoad_with_depth_textures,renderPipeline:*": { "subcaseMS": 1.259 }, "webgpu:compat,api,validation,render_pipeline,unsupported_wgsl:unsupportedStorageTextureFormats,computePipeline:*": { "subcaseMS": 1.206 }, "webgpu:compat,api,validation,render_pipeline,unsupported_wgsl:unsupportedStorageTextureFormats,renderPipeline:*": { "subcaseMS": 1.206 }, + "webgpu:compat,api,validation,render_pipeline,vertex_state:maxVertexAttributesVertexIndexInstanceIndex:*": { "subcaseMS": 3.700 }, "webgpu:compat,api,validation,texture,createTexture:depthOrArrayLayers_incompatible_with_textureBindingViewDimension:*": { "subcaseMS": 12.712 }, "webgpu:compat,api,validation,texture,createTexture:format_reinterpretation:*": { "subcaseMS": 7.012 }, "webgpu:compat,api,validation,texture,createTexture:invalidTextureBindingViewDimension:*": { "subcaseMS": 6.022 }, @@ -1471,6 +1471,16 @@ "webgpu:shader,execution,expression,call,builtin,pow:abstract_float:*": { "subcaseMS": 30535.000 }, "webgpu:shader,execution,expression,call,builtin,pow:f16:*": { "subcaseMS": 816.063 }, "webgpu:shader,execution,expression,call,builtin,pow:f32:*": { "subcaseMS": 151.269 }, + "webgpu:shader,execution,expression,call,builtin,quadBroadcast:compute,all_active:*": { "subcaseMS": 734.565 }, + "webgpu:shader,execution,expression,call,builtin,quadBroadcast:compute,split:*": { "subcaseMS": 2001.058 }, + "webgpu:shader,execution,expression,call,builtin,quadBroadcast:data_types:*": { "subcaseMS": 29.144 }, + "webgpu:shader,execution,expression,call,builtin,quadBroadcast:fragment,all_active:*": { "subcaseMS": 317.251 }, + "webgpu:shader,execution,expression,call,builtin,quadBroadcast:fragment,split:*": { "subcaseMS": 0.206 }, + "webgpu:shader,execution,expression,call,builtin,quadSwap:compute,all_active:*": { "subcaseMS": 1658.818 }, + "webgpu:shader,execution,expression,call,builtin,quadSwap:compute,split:*": { "subcaseMS": 3082.458 }, + "webgpu:shader,execution,expression,call,builtin,quadSwap:data_types:*": { "subcaseMS": 1532.129 }, + "webgpu:shader,execution,expression,call,builtin,quadSwap:fragment,all_active:*": { "subcaseMS": 28.025 }, + "webgpu:shader,execution,expression,call,builtin,quadSwap:fragment,split:*": { "subcaseMS": 0.542 }, "webgpu:shader,execution,expression,call,builtin,quantizeToF16:f32:*": { "subcaseMS": 11.063 }, "webgpu:shader,execution,expression,call,builtin,radians:abstract_float:*": { "subcaseMS": 12268.988 }, "webgpu:shader,execution,expression,call,builtin,radians:f16:*": { "subcaseMS": 18.707 }, @@ -1525,15 +1535,34 @@ "webgpu:shader,execution,expression,call,builtin,step:f32:*": { "subcaseMS": 291.363 }, "webgpu:shader,execution,expression,call,builtin,storageBarrier:barrier:*": { "subcaseMS": 0.801 }, "webgpu:shader,execution,expression,call,builtin,storageBarrier:stage:*": { "subcaseMS": 2.402 }, + "webgpu:shader,execution,expression,call,builtin,subgroupAdd:compute,split:*": { "subcaseMS": 2853.671 }, + "webgpu:shader,execution,expression,call,builtin,subgroupAdd:data_types:*": { "subcaseMS": 9216.247 }, + "webgpu:shader,execution,expression,call,builtin,subgroupAdd:fp_accuracy:*": { "subcaseMS": 9952.350 }, + "webgpu:shader,execution,expression,call,builtin,subgroupAdd:fragment:*": { "subcaseMS": 0.229 }, + "webgpu:shader,execution,expression,call,builtin,subgroupAll:compute,all_active:*": { "subcaseMS": 5162.414 }, + "webgpu:shader,execution,expression,call,builtin,subgroupAll:compute,split:*": { "subcaseMS": 26610.627 }, + "webgpu:shader,execution,expression,call,builtin,subgroupAll:fragment,all_active:*": { "subcaseMS": 0.172 }, + "webgpu:shader,execution,expression,call,builtin,subgroupAll:fragment,split:*": { "subcaseMS": 0.327 }, + "webgpu:shader,execution,expression,call,builtin,subgroupAny:compute,all_active:*": { "subcaseMS": 7028.394 }, + "webgpu:shader,execution,expression,call,builtin,subgroupAny:compute,split:*": { "subcaseMS": 50.998 }, + "webgpu:shader,execution,expression,call,builtin,subgroupAny:fragment,all_active:*": { "subcaseMS": 0.227 }, + "webgpu:shader,execution,expression,call,builtin,subgroupAny:fragment,split:*": { "subcaseMS": 0.309 }, "webgpu:shader,execution,expression,call,builtin,subgroupBallot:compute,split:*": { "subcaseMS": 38.740 }, "webgpu:shader,execution,expression,call,builtin,subgroupBallot:fragment,split:*": { "subcaseMS": 0.331 }, "webgpu:shader,execution,expression,call,builtin,subgroupBallot:fragment:*": { "subcaseMS": 0.059 }, "webgpu:shader,execution,expression,call,builtin,subgroupBallot:predicate:*": { "subcaseMS": 0.075 }, "webgpu:shader,execution,expression,call,builtin,subgroupBallot:predicate_and_control_flow:*": { "subcaseMS": 41.053 }, + "webgpu:shader,execution,expression,call,builtin,subgroupBitwise:compute,all_active:*": { "subcaseMS": 1251.161 }, + "webgpu:shader,execution,expression,call,builtin,subgroupBitwise:compute,split:*": { "subcaseMS": 1743.045 }, + "webgpu:shader,execution,expression,call,builtin,subgroupBitwise:data_types:*": { "subcaseMS": 5081.792 }, + "webgpu:shader,execution,expression,call,builtin,subgroupBitwise:fragment,all_active:*": { "subcaseMS": 9079.446 }, "webgpu:shader,execution,expression,call,builtin,subgroupBroadcast:data_types:*": { "subcaseMS": 252.374 }, - "webgpu:shader,execution,expression,call,builtin,subgroupBroadcast:dynamically_uniform_id:*": { "subcaseMS": 0.211 }, "webgpu:shader,execution,expression,call,builtin,subgroupBroadcast:fragment:*": { "subcaseMS": 0.108 }, "webgpu:shader,execution,expression,call,builtin,subgroupBroadcast:workgroup_uniform_load:*": { "subcaseMS": 109.832 }, + "webgpu:shader,execution,expression,call,builtin,subgroupMul:compute,split:*": { "subcaseMS": 5034.263 }, + "webgpu:shader,execution,expression,call,builtin,subgroupMul:data_types:*": { "subcaseMS": 11861.865 }, + "webgpu:shader,execution,expression,call,builtin,subgroupMul:fp_accuracy:*": { "subcaseMS": 35606.717 }, + "webgpu:shader,execution,expression,call,builtin,subgroupMul:fragment:*": { "subcaseMS": 0.263 }, "webgpu:shader,execution,expression,call,builtin,tan:abstract_float:*": { "subcaseMS": 17043.428 }, "webgpu:shader,execution,expression,call,builtin,tan:f16:*": { "subcaseMS": 116.157 }, "webgpu:shader,execution,expression,call,builtin,tan:f32:*": { "subcaseMS": 13.532 }, @@ -1554,8 +1583,8 @@ "webgpu:shader,execution,expression,call,builtin,textureGather:sampled_array_3d_coords:*": { "subcaseMS": 60.700 }, "webgpu:shader,execution,expression,call,builtin,textureGatherCompare:array_2d_coords:*": { "subcaseMS": 291.301 }, "webgpu:shader,execution,expression,call,builtin,textureGatherCompare:array_3d_coords:*": { "subcaseMS": 191.101 }, - "webgpu:shader,execution,expression,call,builtin,textureGatherCompare:sampled_array_2d_coords:*": { "subcaseMS": 57.600 }, - "webgpu:shader,execution,expression,call,builtin,textureGatherCompare:sampled_array_3d_coords:*": { "subcaseMS": 10.101 }, + "webgpu:shader,execution,expression,call,builtin,textureGatherCompare:sampled_2d_coords:*": { "subcaseMS": 57.600 }, + "webgpu:shader,execution,expression,call,builtin,textureGatherCompare:sampled_3d_coords:*": { "subcaseMS": 10.101 }, "webgpu:shader,execution,expression,call,builtin,textureLoad:arrayed:*": { "subcaseMS": 30.501 }, "webgpu:shader,execution,expression,call,builtin,textureLoad:depth:*": { "subcaseMS": 3.200 }, "webgpu:shader,execution,expression,call,builtin,textureLoad:external:*": { "subcaseMS": 1.401 }, @@ -1579,7 +1608,6 @@ "webgpu:shader,execution,expression,call,builtin,textureSample:depth_array_2d_coords:*": { "subcaseMS": 92.601 }, "webgpu:shader,execution,expression,call,builtin,textureSample:depth_array_3d_coords:*": { "subcaseMS": 20.301 }, "webgpu:shader,execution,expression,call,builtin,textureSample:sampled_1d_coords:*": { "subcaseMS": 1.200 }, - "webgpu:shader,execution,expression,call,builtin,textureSample:sampled_2d_coords,derivatives:*": { "subcaseMS": 0.091 }, "webgpu:shader,execution,expression,call,builtin,textureSample:sampled_2d_coords:*": { "subcaseMS": 12.500 }, "webgpu:shader,execution,expression,call,builtin,textureSample:sampled_3d_coords:*": { "subcaseMS": 36.002 }, "webgpu:shader,execution,expression,call,builtin,textureSample:sampled_array_2d_coords:*": { "subcaseMS": 92.500 }, @@ -1597,8 +1625,6 @@ "webgpu:shader,execution,expression,call,builtin,textureSampleCompareLevel:3d_coords:*": { "subcaseMS": 10.301 }, "webgpu:shader,execution,expression,call,builtin,textureSampleCompareLevel:arrayed_2d_coords:*": { "subcaseMS": 705.100 }, "webgpu:shader,execution,expression,call,builtin,textureSampleCompareLevel:arrayed_3d_coords:*": { "subcaseMS": 622.700 }, - "webgpu:shader,execution,expression,call,builtin,textureSampleCompareLevel:control_flow:*": { "subcaseMS": 2.202 }, - "webgpu:shader,execution,expression,call,builtin,textureSampleCompareLevel:stage:*": { "subcaseMS": 7.901 }, "webgpu:shader,execution,expression,call,builtin,textureSampleGrad:sampled_2d_coords:*": { "subcaseMS": 82.401 }, "webgpu:shader,execution,expression,call,builtin,textureSampleGrad:sampled_3d_coords:*": { "subcaseMS": 309.101 }, "webgpu:shader,execution,expression,call,builtin,textureSampleGrad:sampled_array_2d_coords:*": { "subcaseMS": 352.900 }, @@ -1849,6 +1875,8 @@ "webgpu:shader,execution,padding:array_of_matCx3:*": { "subcaseMS": 8.650 }, "webgpu:shader,execution,padding:array_of_struct:*": { "subcaseMS": 5.801 }, "webgpu:shader,execution,padding:array_of_vec3:*": { "subcaseMS": 10.500 }, + "webgpu:shader,execution,padding:array_of_vec3h,elementwise:*": { "subcaseMS": 24.607 }, + "webgpu:shader,execution,padding:array_of_vec3h:*": { "subcaseMS": 26.941 }, "webgpu:shader,execution,padding:matCx3:*": { "subcaseMS": 10.050 }, "webgpu:shader,execution,padding:struct_explicit:*": { "subcaseMS": 12.000 }, "webgpu:shader,execution,padding:struct_implicit:*": { "subcaseMS": 33.201 }, @@ -2005,6 +2033,11 @@ "webgpu:shader,validation,expression,binary,div_rem:scalar_vector:*": { "subcaseMS": 743.721 }, "webgpu:shader,validation,expression,binary,div_rem:scalar_vector_out_of_range:*": { "subcaseMS": 650.727 }, "webgpu:shader,validation,expression,binary,parse:all:*": { "subcaseMS": 527.287 }, + "webgpu:shader,validation,expression,binary,short_circuiting_and_or:invalid_array_count_on_rhs:*": { "subcaseMS": 4.309 }, + "webgpu:shader,validation,expression,binary,short_circuiting_and_or:invalid_rhs_const:*": { "subcaseMS": 4.341 }, + "webgpu:shader,validation,expression,binary,short_circuiting_and_or:invalid_rhs_override:*": { "subcaseMS": 27.490 }, + "webgpu:shader,validation,expression,binary,short_circuiting_and_or:invalid_types:*": { "subcaseMS": 13.409 }, + "webgpu:shader,validation,expression,binary,short_circuiting_and_or:scalar_vector:*": { "subcaseMS": 397.769 }, "webgpu:shader,validation,expression,call,builtin,abs:parameters:*": { "subcaseMS": 10.133 }, "webgpu:shader,validation,expression,call,builtin,abs:values:*": { "subcaseMS": 0.391 }, "webgpu:shader,validation,expression,call,builtin,acos:integer_argument:*": { "subcaseMS": 1.512 }, @@ -2226,6 +2259,22 @@ "webgpu:shader,validation,expression,call,builtin,pow:invalid_argument:*": { "subcaseMS": 1.000 }, "webgpu:shader,validation,expression,call,builtin,pow:must_use:*": { "subcaseMS": 1.000 }, "webgpu:shader,validation,expression,call,builtin,pow:values:*": { "subcaseMS": 1.000 }, + "webgpu:shader,validation,expression,call,builtin,quadBroadcast:data_type:*": { "subcaseMS": 39.783 }, + "webgpu:shader,validation,expression,call,builtin,quadBroadcast:early_eval:*": { "subcaseMS": 63.825 }, + "webgpu:shader,validation,expression,call,builtin,quadBroadcast:id_constness:*": { "subcaseMS": 15.347 }, + "webgpu:shader,validation,expression,call,builtin,quadBroadcast:id_type:*": { "subcaseMS": 26.268 }, + "webgpu:shader,validation,expression,call,builtin,quadBroadcast:must_use:*": { "subcaseMS": 41.658 }, + "webgpu:shader,validation,expression,call,builtin,quadBroadcast:requires_subgroups:*": { "subcaseMS": 42.565 }, + "webgpu:shader,validation,expression,call,builtin,quadBroadcast:requires_subgroups_f16:*": { "subcaseMS": 44.998 }, + "webgpu:shader,validation,expression,call,builtin,quadBroadcast:return_type:*": { "subcaseMS": 363.607 }, + "webgpu:shader,validation,expression,call,builtin,quadBroadcast:stage:*": { "subcaseMS": 3.050 }, + "webgpu:shader,validation,expression,call,builtin,quadSwap:data_type:*": { "subcaseMS": 89.379 }, + "webgpu:shader,validation,expression,call,builtin,quadSwap:early_eval:*": { "subcaseMS": 108.243 }, + "webgpu:shader,validation,expression,call,builtin,quadSwap:must_use:*": { "subcaseMS": 5.557 }, + "webgpu:shader,validation,expression,call,builtin,quadSwap:requires_subgroups:*": { "subcaseMS": 113.624 }, + "webgpu:shader,validation,expression,call,builtin,quadSwap:requires_subgroups_f16:*": { "subcaseMS": 12.712 }, + "webgpu:shader,validation,expression,call,builtin,quadSwap:return_type:*": { "subcaseMS": 1424.551 }, + "webgpu:shader,validation,expression,call,builtin,quadSwap:stage:*": { "subcaseMS": 7.664 }, "webgpu:shader,validation,expression,call,builtin,quantizeToF16:args:*": { "subcaseMS": 1.000 }, "webgpu:shader,validation,expression,call,builtin,quantizeToF16:must_use:*": { "subcaseMS": 1.000 }, "webgpu:shader,validation,expression,call,builtin,quantizeToF16:values:*": { "subcaseMS": 1.000 }, @@ -2278,16 +2327,73 @@ "webgpu:shader,validation,expression,call,builtin,step:args:*": { "subcaseMS": 1.000 }, "webgpu:shader,validation,expression,call,builtin,step:must_use:*": { "subcaseMS": 1.000 }, "webgpu:shader,validation,expression,call,builtin,step:values:*": { "subcaseMS": 1.000 }, + "webgpu:shader,validation,expression,call,builtin,subgroupAdd:data_type:*": { "subcaseMS": 32.897 }, + "webgpu:shader,validation,expression,call,builtin,subgroupAdd:early_eval:*": { "subcaseMS": 101.800 }, + "webgpu:shader,validation,expression,call,builtin,subgroupAdd:invalid_types:*": { "subcaseMS": 95.889 }, + "webgpu:shader,validation,expression,call,builtin,subgroupAdd:must_use:*": { "subcaseMS": 62.933 }, + "webgpu:shader,validation,expression,call,builtin,subgroupAdd:return_type:*": { "subcaseMS": 363.546 }, + "webgpu:shader,validation,expression,call,builtin,subgroupAdd:stage:*": { "subcaseMS": 3.536 }, + "webgpu:shader,validation,expression,call,builtin,subgroupAnyAll:data_type:*": { "subcaseMS": 57.943 }, + "webgpu:shader,validation,expression,call,builtin,subgroupAnyAll:early_eval:*": { "subcaseMS": 173.714 }, + "webgpu:shader,validation,expression,call,builtin,subgroupAnyAll:must_use:*": { "subcaseMS": 4.592 }, + "webgpu:shader,validation,expression,call,builtin,subgroupAnyAll:requires_subgroups:*": { "subcaseMS": 73.866 }, + "webgpu:shader,validation,expression,call,builtin,subgroupAnyAll:return_type:*": { "subcaseMS": 39.388 }, + "webgpu:shader,validation,expression,call,builtin,subgroupAnyAll:stage:*": { "subcaseMS": 6.862 }, "webgpu:shader,validation,expression,call,builtin,subgroupBallot:data_type:*": { "subcaseMS": 115.557 }, "webgpu:shader,validation,expression,call,builtin,subgroupBallot:early_eval:*": { "subcaseMS": 52.992 }, + "webgpu:shader,validation,expression,call,builtin,subgroupBallot:must_use:*": { "subcaseMS": 39.441 }, + "webgpu:shader,validation,expression,call,builtin,subgroupBallot:requires_subgroups:*": { "subcaseMS": 36.819 }, "webgpu:shader,validation,expression,call,builtin,subgroupBallot:return_type:*": { "subcaseMS": 22.381 }, "webgpu:shader,validation,expression,call,builtin,subgroupBallot:stage:*": { "subcaseMS": 3.712 }, + "webgpu:shader,validation,expression,call,builtin,subgroupBitwise:data_type:*": { "subcaseMS": 94.072 }, + "webgpu:shader,validation,expression,call,builtin,subgroupBitwise:early_eval:*": { "subcaseMS": 569.598 }, + "webgpu:shader,validation,expression,call,builtin,subgroupBitwise:must_use:*": { "subcaseMS": 6.172 }, + "webgpu:shader,validation,expression,call,builtin,subgroupBitwise:requires_subgroups:*": { "subcaseMS": 108.478 }, + "webgpu:shader,validation,expression,call,builtin,subgroupBitwise:return_type:*": { "subcaseMS": 1430.736 }, + "webgpu:shader,validation,expression,call,builtin,subgroupBitwise:stage:*": { "subcaseMS": 11.858 }, "webgpu:shader,validation,expression,call,builtin,subgroupBroadcast:data_type:*": { "subcaseMS": 97.991 }, "webgpu:shader,validation,expression,call,builtin,subgroupBroadcast:early_eval:*": { "subcaseMS": 1.254 }, + "webgpu:shader,validation,expression,call,builtin,subgroupBroadcast:id_constness:*": { "subcaseMS": 7.026 }, "webgpu:shader,validation,expression,call,builtin,subgroupBroadcast:id_type:*": { "subcaseMS": 24.703 }, "webgpu:shader,validation,expression,call,builtin,subgroupBroadcast:must_use:*": { "subcaseMS": 232.030 }, + "webgpu:shader,validation,expression,call,builtin,subgroupBroadcast:requires_subgroups:*": { "subcaseMS": 47.231 }, + "webgpu:shader,validation,expression,call,builtin,subgroupBroadcast:requires_subgroups_f16:*": { "subcaseMS": 38.503 }, "webgpu:shader,validation,expression,call,builtin,subgroupBroadcast:return_type:*": { "subcaseMS": 496.031 }, "webgpu:shader,validation,expression,call,builtin,subgroupBroadcast:stage:*": { "subcaseMS": 3.715 }, + "webgpu:shader,validation,expression,call,builtin,subgroupBroadcastFirst:data_type:*": { "subcaseMS": 32.168 }, + "webgpu:shader,validation,expression,call,builtin,subgroupBroadcastFirst:early_eval:*": { "subcaseMS": 57.922 }, + "webgpu:shader,validation,expression,call,builtin,subgroupBroadcastFirst:must_use:*": { "subcaseMS": 36.296 }, + "webgpu:shader,validation,expression,call,builtin,subgroupBroadcastFirst:requires_subgroups:*": { "subcaseMS": 42.522 }, + "webgpu:shader,validation,expression,call,builtin,subgroupBroadcastFirst:requires_subgroups_f16:*": { "subcaseMS": 47.111 }, + "webgpu:shader,validation,expression,call,builtin,subgroupBroadcastFirst:return_type:*": { "subcaseMS": 402.558 }, + "webgpu:shader,validation,expression,call,builtin,subgroupBroadcastFirst:stage:*": { "subcaseMS": 2.869 }, + "webgpu:shader,validation,expression,call,builtin,subgroupElect:data_type:*": { "subcaseMS": 72.441 }, + "webgpu:shader,validation,expression,call,builtin,subgroupElect:early_eval:*": { "subcaseMS": 56.115 }, + "webgpu:shader,validation,expression,call,builtin,subgroupElect:must_use:*": { "subcaseMS": 32.820 }, + "webgpu:shader,validation,expression,call,builtin,subgroupElect:requires_subgroups:*": { "subcaseMS": 35.595 }, + "webgpu:shader,validation,expression,call,builtin,subgroupElect:return_type:*": { "subcaseMS": 22.712 }, + "webgpu:shader,validation,expression,call,builtin,subgroupElect:stage:*": { "subcaseMS": 3.790 }, + "webgpu:shader,validation,expression,call,builtin,subgroupMinMax:data_type:*": { "subcaseMS": 64.143 }, + "webgpu:shader,validation,expression,call,builtin,subgroupMinMax:early_eval:*": { "subcaseMS": 551.671 }, + "webgpu:shader,validation,expression,call,builtin,subgroupMinMax:must_use:*": { "subcaseMS": 4.403 }, + "webgpu:shader,validation,expression,call,builtin,subgroupMinMax:requires_subgroups:*": { "subcaseMS": 87.208 }, + "webgpu:shader,validation,expression,call,builtin,subgroupMinMax:requires_subgroups_f16:*": { "subcaseMS": 25.190 }, + "webgpu:shader,validation,expression,call,builtin,subgroupMinMax:return_type:*": { "subcaseMS": 911.454 }, + "webgpu:shader,validation,expression,call,builtin,subgroupMinMax:stage:*": { "subcaseMS": 6.395 }, + "webgpu:shader,validation,expression,call,builtin,subgroupMul:data_type:*": { "subcaseMS": 45.396 }, + "webgpu:shader,validation,expression,call,builtin,subgroupMul:early_eval:*": { "subcaseMS": 56.571 }, + "webgpu:shader,validation,expression,call,builtin,subgroupMul:invalid_types:*": { "subcaseMS": 91.040 }, + "webgpu:shader,validation,expression,call,builtin,subgroupMul:must_use:*": { "subcaseMS": 39.041 }, + "webgpu:shader,validation,expression,call,builtin,subgroupMul:return_type:*": { "subcaseMS": 549.172 }, + "webgpu:shader,validation,expression,call,builtin,subgroupMul:stage:*": { "subcaseMS": 4.489 }, + "webgpu:shader,validation,expression,call,builtin,subgroupShuffle:data_type:*": { "subcaseMS": 115.093 }, + "webgpu:shader,validation,expression,call,builtin,subgroupShuffle:early_eval:*": { "subcaseMS": 110.489 }, + "webgpu:shader,validation,expression,call,builtin,subgroupShuffle:must_use:*": { "subcaseMS": 7.628 }, + "webgpu:shader,validation,expression,call,builtin,subgroupShuffle:param2_type:*": { "subcaseMS": 88.305 }, + "webgpu:shader,validation,expression,call,builtin,subgroupShuffle:requires_subgroups:*": { "subcaseMS": 102.779 }, + "webgpu:shader,validation,expression,call,builtin,subgroupShuffle:requires_subgroups_f16:*": { "subcaseMS": 13.121 }, + "webgpu:shader,validation,expression,call,builtin,subgroupShuffle:return_type:*": { "subcaseMS": 1930.309 }, + "webgpu:shader,validation,expression,call,builtin,subgroupShuffle:stage:*": { "subcaseMS": 9.527 }, "webgpu:shader,validation,expression,call,builtin,tan:args:*": { "subcaseMS": 43.560 }, "webgpu:shader,validation,expression,call,builtin,tan:must_use:*": { "subcaseMS": 5.401 }, "webgpu:shader,validation,expression,call,builtin,tan:values:*": { "subcaseMS": 0.350 }, diff --git a/src/webgpu/print_environment.spec.ts b/src/webgpu/print_environment.spec.ts index 9790c770cefa..f3ca67d3a2c0 100644 --- a/src/webgpu/print_environment.spec.ts +++ b/src/webgpu/print_environment.spec.ts @@ -35,9 +35,7 @@ NOTE: If your test runtime elides logs when tests pass, you won't see the prints in the logs. On non-WPT runtimes, it will also print to the console with console.log. WPT disallows console.log and doesn't support logs on passing tests, so this does nothing on WPT.` ) - .fn(async t => { - // MAINTENANCE_TODO: Remove requestAdapterInfo when info is implemented. - const adapterInfo = t.adapter.info || (await t.adapter.requestAdapterInfo()); + .fn(t => { const isCompatibilityMode = (t.adapter as unknown as { isCompatibilityMode?: boolean }) .isCompatibilityMode; @@ -51,7 +49,7 @@ WPT disallows console.log and doesn't support logs on passing tests, so this doe adapter: { isFallbackAdapter: t.adapter.isFallbackAdapter, isCompatibilityMode, - info: adapterInfo, + info: t.adapter.info, features: Array.from(t.adapter.features), limits: t.adapter.limits, }, diff --git a/src/webgpu/shader/execution/expression/access/matrix/index.spec.ts b/src/webgpu/shader/execution/expression/access/matrix/index.spec.ts index f6fd05b46fcb..b8872eeab99f 100644 --- a/src/webgpu/shader/execution/expression/access/matrix/index.spec.ts +++ b/src/webgpu/shader/execution/expression/access/matrix/index.spec.ts @@ -11,7 +11,9 @@ import { abstractFloat, f32, vec, + Value, } from '../../../../../util/conversion.js'; +import { align } from '../../../../../util/math.js'; import { Case } from '../../case.js'; import { allInputSources, basicExpressionBuilder, run } from '../../expression.js'; @@ -198,3 +200,73 @@ g.test('abstract_float_element') cases ); }); + +g.test('non_const_index') + .specURL('https://www.w3.org/TR/WGSL/#matrix-access-expr') + .desc(`Test indexing of a matrix using non-const index`) + .params(u => u.combine('columns', [2, 3, 4] as const).combine('rows', [2, 3, 4] as const)) + .fn(t => { + const cols = t.params.columns; + const rows = t.params.rows; + const values = Array.from(Array(cols * rows).keys()); + const wgsl = ` +@group(0) @binding(0) var output : array; + +@compute @workgroup_size(${cols}, ${rows}) +fn main(@builtin(local_invocation_id) invocation_id : vec3) { + let m = mat${cols}x${rows}f(${values.join(', ')}); + output[invocation_id.x*${rows} + invocation_id.y] = m[invocation_id.x][invocation_id.y]; +} +`; + + const pipeline = t.device.createComputePipeline({ + layout: 'auto', + compute: { + module: t.device.createShaderModule({ code: wgsl }), + entryPoint: 'main', + }, + }); + + const bufferSize = (arr: Value[]) => { + let offset = 0; + let alignment = 0; + for (const value of arr) { + alignment = Math.max(alignment, value.type.alignment); + offset = align(offset, value.type.alignment) + value.type.size; + } + return align(offset, alignment); + }; + + const toArray = (arr: Value[]) => { + const array = new Uint8Array(bufferSize(arr)); + let offset = 0; + for (const value of arr) { + offset = align(offset, value.type.alignment); + value.copyTo(array, offset); + offset += value.type.size; + } + return array; + }; + + const expected = values.map(i => Type['f32'].create(i)); + + const outputBuffer = t.createBufferTracked({ + size: bufferSize(expected), + usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC, + }); + + const bindGroup = t.device.createBindGroup({ + layout: pipeline.getBindGroupLayout(0), + entries: [{ binding: 0, resource: { buffer: outputBuffer } }], + }); + + const encoder = t.device.createCommandEncoder(); + const pass = encoder.beginComputePass(); + pass.setPipeline(pipeline); + pass.setBindGroup(0, bindGroup); + pass.dispatchWorkgroups(1); + pass.end(); + t.queue.submit([encoder.finish()]); + + t.expectGPUBufferValuesEqual(outputBuffer, toArray(expected)); + }); diff --git a/src/webgpu/shader/execution/expression/call/builtin/quadBroadcast.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/quadBroadcast.spec.ts new file mode 100644 index 000000000000..29a3ec47c860 --- /dev/null +++ b/src/webgpu/shader/execution/expression/call/builtin/quadBroadcast.spec.ts @@ -0,0 +1,656 @@ +export const description = ` +Execution tests for quadBroadcast. + +Note: There is a lack of portability for non-uniform execution so these tests +restrict themselves to uniform control flow. +Note: There is no guaranteed mapping between subgroup_invocation_id and +local_invocation_index. Tests should avoid assuming there is. +`; + +import { makeTestGroup } from '../../../../../../common/framework/test_group.js'; +import { keysOf, objectsToRecord } from '../../../../../../common/util/data_tables.js'; +import { assert, unreachable } from '../../../../../../common/util/util.js'; +import { kTextureFormatInfo } from '../../../../../format_info.js'; +import { kBit } from '../../../../../util/constants.js'; +import { + kConcreteNumericScalarsAndVectors, + Type, + VectorType, + scalarTypeOf, +} from '../../../../../util/conversion.js'; +import { align } from '../../../../../util/math.js'; + +import { + kWGSizes, + kDataSentinel, + kPredicateCases, + runComputeTest, + SubgroupTest, + kFramebufferSizes, + runFragmentTest, +} from './subgroup_util.js'; + +export const g = makeTestGroup(SubgroupTest); + +const kTypes = objectsToRecord(kConcreteNumericScalarsAndVectors); + +/** + * Generates scalar values for type + * + * Generates 4 32-bit values whose bit patterns represent + * interesting values of the data type. + * @param type The data type + */ +function generateScalarValues(type: Type): number[] { + const scalarTy = scalarTypeOf(type); + switch (scalarTy) { + case Type.u32: + return [kBit.u32.min, kBit.u32.max, 1111, 2222]; + case Type.i32: + return [ + kBit.i32.positive.min, + kBit.i32.positive.max, + kBit.i32.negative.min, + 0xffffffff, // -1 + ]; + case Type.f32: + return [ + kBit.f32.positive.zero, + kBit.f32.positive.nearest_max, + kBit.f32.negative.nearest_min, + 0xbf800000, // -1 + ]; + case Type.f16: + return [ + kBit.f16.positive.zero, + kBit.f16.positive.nearest_max, + kBit.f16.negative.nearest_min, + 0xbc00, // -1 + ]; + default: + unreachable(`Unsupported type: ${type.toString()}`); + } + return [0, 0, 0, 0]; +} + +/** + * Generates input bit patterns for the input type + * + * Generates 4 values of type in a Uint32Array. + * 16-bit types are appropriately packed. + * @param type The data type + */ +function generateTypedInputs(type: Type): Uint32Array { + const scalarValues = generateScalarValues(type); + let elements = 1; + if (type instanceof VectorType) { + elements = type.width; + } + if (type.requiresF16()) { + switch (elements) { + case 1: + return new Uint32Array([ + scalarValues[0] | (scalarValues[1] << 16), + scalarValues[2] | (scalarValues[3] << 16), + ]); + case 2: + return new Uint32Array([ + scalarValues[0] | (scalarValues[0] << 16), + scalarValues[1] | (scalarValues[1] << 16), + scalarValues[2] | (scalarValues[2] << 16), + scalarValues[3] | (scalarValues[3] << 16), + ]); + case 3: + return new Uint32Array([ + scalarValues[0] | (scalarValues[0] << 16), + scalarValues[0] | (kDataSentinel << 16), + scalarValues[1] | (scalarValues[1] << 16), + scalarValues[1] | (kDataSentinel << 16), + scalarValues[2] | (scalarValues[2] << 16), + scalarValues[2] | (kDataSentinel << 16), + scalarValues[3] | (scalarValues[3] << 16), + scalarValues[3] | (kDataSentinel << 16), + ]); + case 4: + return new Uint32Array([ + scalarValues[0] | (scalarValues[0] << 16), + scalarValues[0] | (scalarValues[0] << 16), + scalarValues[1] | (scalarValues[1] << 16), + scalarValues[1] | (scalarValues[1] << 16), + scalarValues[2] | (scalarValues[2] << 16), + scalarValues[2] | (scalarValues[2] << 16), + scalarValues[3] | (scalarValues[3] << 16), + scalarValues[3] | (scalarValues[3] << 16), + ]); + default: + unreachable(`Unsupported type: ${type.toString()}`); + } + return new Uint32Array([0]); + } else { + const bound = elements === 3 ? 4 : elements; + const values: number[] = []; + for (let i = 0; i < 4; i++) { + for (let j = 0; j < bound; j++) { + if (j < elements) { + values.push(scalarValues[i]); + } else { + values.push(kDataSentinel); + } + } + } + return new Uint32Array(values); + } +} + +/** + * Checks results from data types test + * + * The output is expected to match the input values corresponding to the + * id being broadcast (assuming a linear mapping). + * @param metadata An unused parameter + * @param output The output data + * @param input The input data + * @param broadcast The id being broadcast + * @param type The data type being tested + */ +function checkDataTypes( + metadata: Uint32Array, // unused + output: Uint32Array, + input: Uint32Array, + broadcast: number, + type: Type +): Error | undefined { + if (type.requiresF16() && !(type instanceof VectorType)) { + const expectIdx = Math.floor(broadcast / 2); + const expectShift = broadcast % 2 === 1; + let expect = input[expectIdx]; + if (expectShift) { + expect >>= 16; + } + expect &= 0xffff; + + for (let i = 0; i < 4; i++) { + const index = Math.floor(i / 2); + const shift = i % 2 === 1; + let res = output[index]; + if (shift) { + res >>= 16; + } + res &= 0xffff; + if (res !== expect) { + return new Error(`${i}: incorrect result +- expected: ${expect} +- got: ${res}`); + } + } + } else { + let uints = 1; + if (type instanceof VectorType) { + uints = type.width === 3 ? 4 : type.width; + if (type.requiresF16()) { + uints = Math.floor(uints / 2); + } + } + for (let i = 0; i < 4; i++) { + for (let j = 0; j < uints; j++) { + const expect = input[broadcast * uints + j]; + const res = output[i * uints + j]; + if (res !== expect) { + return new Error(`${i * uints + j}: incorrect result +- expected: ${expect} +- got: ${res}`); + } + } + } + } + + return undefined; +} + +g.test('data_types') + .desc('Test allowed data types') + .params(u => + u + .combine('type', keysOf(kTypes)) + .beginSubcases() + .combine('id', [0, 1, 2, 3] as const) + ) + .beforeAllSubcases(t => { + const features: GPUFeatureName[] = ['subgroups' as GPUFeatureName]; + const type = kTypes[t.params.type]; + if (type.requiresF16()) { + features.push('subgroups-f16' as GPUFeatureName); + features.push('shader-f16' as GPUFeatureName); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(async t => { + const wgSize = [4, 1, 1]; + const type = kTypes[t.params.type]; + let enables = `enable subgroups;\n`; + if (type.requiresF16()) { + enables += `enable f16;\nenable subgroups_f16;`; + } + const wgsl = ` +${enables} + +@group(0) @binding(0) +var input : array<${type.toString()}>; + +@group(0) @binding(1) +var output : array<${type.toString()}>; + +@group(0) @binding(2) +var metadata : array; // unused + +@compute @workgroup_size(${wgSize[0]}, ${wgSize[1]}, ${wgSize[2]}) +fn main( + @builtin(subgroup_invocation_id) id : u32, +) { + // Force usage + _ = metadata[0]; + + output[id] = quadBroadcast(input[id], ${t.params.id}); +}`; + + const inputData = generateTypedInputs(type); + let uintsPerOutput = 1; + if (type instanceof VectorType) { + uintsPerOutput = type.width === 3 ? 4 : type.width; + if (type.requiresF16()) { + uintsPerOutput = Math.floor(uintsPerOutput / 2); + } + } + await runComputeTest( + t, + wgsl, + wgSize, + uintsPerOutput, + inputData, + (metadata: Uint32Array, output: Uint32Array) => { + return checkDataTypes(metadata, output, inputData, t.params.id, type); + } + ); + }); + +/** + * Checks quadBroadcast in compute shaders + * + * Assumes that quads are linear within a subgroup. + * + * @param metadata An array of integers divided as follows: + * * first half subgroup invocation ids + * * second half subgroup sizes + * @param output An array of integers divided as follows: + * * first half results of quad broadcast + * * second half generated unique subgroup ids + * @param broadcast The id being broadcast in the range [0, 3] + * @param filter A functor to filter active invocations + */ +function checkBroadcastCompute( + metadata: Uint32Array, + output: Uint32Array, + broadcast: number, + filter: (id: number, size: number) => boolean +): Error | undefined { + assert(broadcast === Math.trunc(broadcast)); + assert(broadcast >= 0 && broadcast <= 3); + + const bound = Math.floor(output.length / 2); + for (let i = 0; i < bound; i++) { + const subgroup_id = output[bound + i]; + const id = metadata[i]; + const size = metadata[bound + i]; + if (!filter(id, size)) { + if (output[i] !== kDataSentinel) { + return new Error(`Unexpected write for invocation ${i}`); + } + continue; + } + + const quad_id = Math.floor(id / 4); + const quad = [-1, -1, -1, -1]; + for (let j = 0; j < bound; j++) { + const other_id = metadata[j]; + const other_quad_id = Math.floor(other_id / 4); + const other_quad_index = other_id % 4; + const other_subgroup_id = output[bound + j]; + if (other_subgroup_id === subgroup_id && quad_id === other_quad_id) { + quad[other_quad_index] = j; + } + } + for (let j = 0; j < 4; j++) { + if (quad[j] === -1) { + return new Error(`Invocation ${i}: missing quad index ${j}`); + } + } + for (let j = 0; j < 4; j++) { + if (output[quad[j]] !== output[quad[broadcast]]) { + return new Error(`Incorrect result for quad: base invocation = ${ + quad[broadcast] + }, invocation = ${quad[j]} +- expected: ${output[quad[broadcast]]} +- got: ${output[quad[j]]}`); + } + } + } + + return undefined; +} + +g.test('compute,all_active') + .desc( + `Tests broadcast with all active invocations + +Quad operations require a full quad so workgroup sizes are limited to multiples of 4. + ` + ) + .params(u => + u + .combine('wgSize', kWGSizes) + .filter(t => { + const wgThreads = t.wgSize[0] * t.wgSize[1] * t.wgSize[2]; + return wgThreads % 4 === 0; + }) + .beginSubcases() + .combine('id', [0, 1, 2, 3] as const) + ) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(async t => { + const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2]; + + const wgsl = ` +enable subgroups; + +@group(0) @binding(0) +var inputs : u32; // unused + +struct Output { + results : array, + subgroup_size : array, +} + +@group(0) @binding(1) +var output : Output; + +struct Metadata { + id : array, + subgroup_size : array, +} + +@group(0) @binding(2) +var metadata : Metadata; + +@compute @workgroup_size(${t.params.wgSize[0]}, ${t.params.wgSize[1]}, ${t.params.wgSize[2]}) +fn main( + @builtin(local_invocation_index) lid : u32, + @builtin(subgroup_invocation_id) id : u32, + @builtin(subgroup_size) subgroupSize : u32, +) { + // Force usage + _ = inputs; + + let b = quadBroadcast(lid, ${t.params.id}); + output.results[lid] = b; + output.subgroup_size[lid] = subgroupBroadcastFirst(lid + 1); + metadata.id[lid] = id; + metadata.subgroup_size[lid] = subgroupSize; +}`; + + const uintsPerOutput = 2; + await runComputeTest( + t, + wgsl, + [t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]], + uintsPerOutput, + new Uint32Array([0]), // unused + (metadata: Uint32Array, output: Uint32Array) => { + return checkBroadcastCompute(metadata, output, t.params.id, (id: number, size: number) => { + return true; + }); + } + ); + }); + +g.test('compute,split') + .desc( + `Tests broadcast with predicated invocations + +Quad operations require a full quad so workgroup sizes are limited to multiples of 4. +Quad operations require a fully active quad to operate correctly so several of the +predication filters are skipped. + ` + ) + .params(u => + u + .combine('predicate', keysOf(kPredicateCases)) + .filter(t => { + return t.predicate === 'lower_half' || t.predicate === 'upper_half'; + }) + .combine('wgSize', kWGSizes) + .filter(t => { + const wgThreads = t.wgSize[0] * t.wgSize[1] * t.wgSize[2]; + return wgThreads % 4 === 0; + }) + .beginSubcases() + .combine('id', [0, 1, 2, 3] as const) + ) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(async t => { + const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2]; + const testcase = kPredicateCases[t.params.predicate]; + + const wgsl = ` +enable subgroups; + +@group(0) @binding(0) +var inputs : u32; // unused + +struct Output { + results : array, + subgroup_size : array, +} + +@group(0) @binding(1) +var output : Output; + +struct Metadata { + id : array, + subgroup_size : array, +} + +@group(0) @binding(2) +var metadata : Metadata; + +@compute @workgroup_size(${t.params.wgSize[0]}, ${t.params.wgSize[1]}, ${t.params.wgSize[2]}) +fn main( + @builtin(local_invocation_index) lid : u32, + @builtin(subgroup_invocation_id) id : u32, + @builtin(subgroup_size) subgroupSize : u32, +) { + // Force usage + _ = inputs; + + output.subgroup_size[lid] = subgroupBroadcastFirst(lid + 1); + metadata.id[lid] = id; + metadata.subgroup_size[lid] = subgroupSize; + + if ${testcase.cond} { + let b = quadBroadcast(lid, ${t.params.id}); + output.results[lid] = b; + } +}`; + + const uintsPerOutput = 2; + await runComputeTest( + t, + wgsl, + [t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]], + uintsPerOutput, + new Uint32Array([0]), // unused + (metadata: Uint32Array, output: Uint32Array) => { + return checkBroadcastCompute(metadata, output, t.params.id, testcase.filter); + } + ); + }); + +/** + * Checks results of quadBroadcast in fragment shaders. + * + * @param data The framebuffer output + * * component 0 is the broadcast of the integer x position + * * component 1 is the broadcast of the integer y position + * @param format The framebuffer format + * @param width Framebuffer width + * @param height Framebuffer height + * @param broadcast The quad id being broadcast + */ +function checkFragment( + data: Uint32Array, + format: GPUTextureFormat, + width: number, + height: number, + broadcast: number +): Error | undefined { + assert(broadcast === Math.trunc(broadcast)); + assert(broadcast >= 0 && broadcast <= 3); + + if (width < 3 || height < 3) { + return new Error( + `Insufficient framebuffer size [${width}w x ${height}h]. Minimum is [3w x 3h].` + ); + } + + const { blockWidth, blockHeight, bytesPerBlock } = kTextureFormatInfo[format]; + const blocksPerRow = width / blockWidth; + // 256 minimum comes from image copy requirements. + const bytesPerRow = align(blocksPerRow * (bytesPerBlock ?? 1), 256); + const uintsPerRow = bytesPerRow / 4; + const uintsPerTexel = (bytesPerBlock ?? 1) / blockWidth / blockHeight / 4; + + const coordToIndex = (row: number, col: number) => { + return uintsPerRow * row + col * uintsPerTexel; + }; + + // Iteration skips last row and column to avoid helper invocations because it is not + // guaranteed whether or not they participate in the subgroup operation. + for (let row = 0; row < height - 1; row++) { + for (let col = 0; col < width - 1; col++) { + const offset = coordToIndex(row, col); + + const row_is_odd = row % 2 === 1; + const col_is_odd = col % 2 === 1; + + // Skip checking quads that extend into potential helper invocations. + const max_row = row_is_odd ? row : row + 1; + const max_col = col_is_odd ? col : col + 1; + if (max_row === height - 1 || max_col === width - 1) { + continue; + } + + let expect_row = row; + let expect_col = col; + switch (broadcast) { + case 0: + expect_row = row_is_odd ? row - 1 : row; + expect_col = col_is_odd ? col - 1 : col; + break; + case 1: + expect_row = row_is_odd ? row - 1 : row; + expect_col = col_is_odd ? col : col + 1; + break; + case 2: + expect_row = row_is_odd ? row : row + 1; + expect_col = col_is_odd ? col - 1 : col; + break; + case 3: + expect_row = row_is_odd ? row : row + 1; + expect_col = col_is_odd ? col : col + 1; + break; + } + + const row_broadcast = data[offset + 1]; + const col_broadcast = data[offset]; + if (expect_row !== row_broadcast) { + return new Error(`Row ${row}, col ${col}: incorrect row results: +- expected: ${expect_row} +- got: ${row_broadcast}`); + } + + if (expect_col !== col_broadcast) { + return new Error(`Row ${row}, col ${col}: incorrect col results: +- expected: ${expect_row} +- got: ${col_broadcast}`); + } + } + } + + return undefined; +} + +g.test('fragment,all_active') + .desc(`Tests quadBroadcast in fragment shaders`) + .params(u => + u + .combine('size', kFramebufferSizes) + .beginSubcases() + .combine('id', [0, 1, 2, 3] as const) + .combineWithParams([{ format: 'rgba32uint' }] as const) + ) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(async t => { + const fsShader = ` +enable subgroups; + +@group(0) @binding(0) +var inputs : array; // unused + +@fragment +fn main( + @builtin(position) pos : vec4f, +) -> @location(0) vec4u { + // Force usage + _ = inputs[0]; + + let linear = u32(pos.x) + u32(pos.y) * ${t.params.size[0]}; + + // Filter out possible helper invocations. + let x_in_range = u32(pos.x) < (${t.params.size[0]} - 1); + let y_in_range = u32(pos.y) < (${t.params.size[1]} - 1); + let in_range = x_in_range && y_in_range; + + var x_broadcast = select(1001, u32(pos.x), in_range); + var y_broadcast = select(1001, u32(pos.y), in_range); + + x_broadcast = quadBroadcast(x_broadcast, ${t.params.id}); + y_broadcast = quadBroadcast(y_broadcast, ${t.params.id}); + + return vec4u(x_broadcast, y_broadcast, 0, 0); +}`; + + await runFragmentTest( + t, + t.params.format, + fsShader, + t.params.size[0], + t.params.size[1], + new Uint32Array([0]), // unused, + (data: Uint32Array) => { + return checkFragment( + data, + t.params.format, + t.params.size[0], + t.params.size[1], + t.params.id + ); + } + ); + }); + +g.test('fragment,split').unimplemented(); diff --git a/src/webgpu/shader/execution/expression/call/builtin/quadSwap.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/quadSwap.spec.ts new file mode 100644 index 000000000000..e6b6863a8e94 --- /dev/null +++ b/src/webgpu/shader/execution/expression/call/builtin/quadSwap.spec.ts @@ -0,0 +1,666 @@ +export const description = ` +Execution tests for quadSwapX, quadSwapY, and quadSwapDiagnoal. + +Note: There is a lack of portability for non-uniform execution so these tests +restrict themselves to uniform control flow. +Note: There is no guaranteed mapping between subgroup_invocation_id and +local_invocation_index. Tests should avoid assuming there is. +`; + +import { makeTestGroup } from '../../../../../../common/framework/test_group.js'; +import { keysOf, objectsToRecord } from '../../../../../../common/util/data_tables.js'; +import { assert, unreachable } from '../../../../../../common/util/util.js'; +import { kTextureFormatInfo } from '../../../../../format_info.js'; +import { kBit } from '../../../../../util/constants.js'; +import { + kConcreteNumericScalarsAndVectors, + Type, + VectorType, + scalarTypeOf, +} from '../../../../../util/conversion.js'; +import { align } from '../../../../../util/math.js'; + +import { + kWGSizes, + kDataSentinel, + kPredicateCases, + runComputeTest, + SubgroupTest, + kFramebufferSizes, + runFragmentTest, +} from './subgroup_util.js'; + +export const g = makeTestGroup(SubgroupTest); + +const kTypes = objectsToRecord(kConcreteNumericScalarsAndVectors); + +type SwapOp = 'quadSwapX' | 'quadSwapY' | 'quadSwapDiagonal'; + +const kOps: SwapOp[] = ['quadSwapX', 'quadSwapY', 'quadSwapDiagonal']; + +/** + * Generates scalar values for type + * + * Generates 4 32-bit values whose bit patterns represent + * interesting values of the data type. + * @param type The data type + */ +function generateScalarValues(type: Type): number[] { + const scalarTy = scalarTypeOf(type); + switch (scalarTy) { + case Type.u32: + return [kBit.u32.min, kBit.u32.max, 1111, 2222]; + case Type.i32: + return [ + kBit.i32.positive.min, + kBit.i32.positive.max, + kBit.i32.negative.min, + 0xffffffff, // -1 + ]; + case Type.f32: + return [ + kBit.f32.positive.zero, + kBit.f32.positive.nearest_max, + kBit.f32.negative.nearest_min, + 0xbf800000, // -1 + ]; + case Type.f16: + return [ + kBit.f16.positive.zero, + kBit.f16.positive.nearest_max, + kBit.f16.negative.nearest_min, + 0xbc00, // -1 + ]; + default: + unreachable(`Unsupported type: ${type.toString()}`); + } + return [0, 0, 0, 0]; +} + +/** + * Generates input bit patterns for the input type + * + * Generates 4 values of type in a Uint32Array. + * 16-bit types are appropriately packed. + * @param type The data type + */ +function generateTypedInputs(type: Type): Uint32Array { + const scalarValues = generateScalarValues(type); + let elements = 1; + if (type instanceof VectorType) { + elements = type.width; + } + if (type.requiresF16()) { + switch (elements) { + case 1: + return new Uint32Array([ + scalarValues[0] | (scalarValues[1] << 16), + scalarValues[2] | (scalarValues[3] << 16), + ]); + case 2: + return new Uint32Array([ + scalarValues[0] | (scalarValues[0] << 16), + scalarValues[1] | (scalarValues[1] << 16), + scalarValues[2] | (scalarValues[2] << 16), + scalarValues[3] | (scalarValues[3] << 16), + ]); + case 3: + return new Uint32Array([ + scalarValues[0] | (scalarValues[0] << 16), + scalarValues[0] | (kDataSentinel << 16), + scalarValues[1] | (scalarValues[1] << 16), + scalarValues[1] | (kDataSentinel << 16), + scalarValues[2] | (scalarValues[2] << 16), + scalarValues[2] | (kDataSentinel << 16), + scalarValues[3] | (scalarValues[3] << 16), + scalarValues[3] | (kDataSentinel << 16), + ]); + case 4: + return new Uint32Array([ + scalarValues[0] | (scalarValues[0] << 16), + scalarValues[0] | (scalarValues[0] << 16), + scalarValues[1] | (scalarValues[1] << 16), + scalarValues[1] | (scalarValues[1] << 16), + scalarValues[2] | (scalarValues[2] << 16), + scalarValues[2] | (scalarValues[2] << 16), + scalarValues[3] | (scalarValues[3] << 16), + scalarValues[3] | (scalarValues[3] << 16), + ]); + default: + unreachable(`Unsupported type: ${type.toString()}`); + } + return new Uint32Array([0]); + } else { + const bound = elements === 3 ? 4 : elements; + const values: number[] = []; + for (let i = 0; i < 4; i++) { + for (let j = 0; j < bound; j++) { + if (j < elements) { + values.push(scalarValues[i]); + } else { + values.push(kDataSentinel); + } + } + } + return new Uint32Array(values); + } +} + +/** + * Returns the swapped quad invocation id for the given op + * + * @param index The index in the range [0,3] + * @param op The swap + */ +function swapIndex(index: number, op: SwapOp): number { + assert(index === Math.trunc(index)); + assert(index >= 0 && index <= 3); + switch (op) { + case 'quadSwapX': + return index ^ 1; + case 'quadSwapY': + return index ^ 2; + case 'quadSwapDiagonal': + return index ^ 3; + } + unreachable(`Unhandled op ${op}`); +} + +/** + * Checks the results of data types test + * + * The outputs for a given index are expected to match the input values + * for the given swap. + * @param metadata An unused parameter + * @param output The output data + * @param input The input data + * @param op The type of swap + * @param type The data type + */ +function checkDataTypes( + metadata: Uint32Array, // unused + output: Uint32Array, + input: Uint32Array, + op: SwapOp, + type: Type +): Error | undefined { + if (type.requiresF16() && !(type instanceof VectorType)) { + for (let i = 0; i < 4; i++) { + const swapIdx = swapIndex(i, op); + + const expectIdx = Math.floor(swapIdx / 2); + const expectShift = swapIdx % 2 === 1; + let expect = input[expectIdx]; + if (expectShift) { + expect >>= 16; + } + expect &= 0xffff; + + const resIdx = Math.floor(i / 2); + const resShift = i % 2 === 1; + let res = output[resIdx]; + if (resShift) { + res >>= 16; + } + res &= 0xffff; + + if (res !== expect) { + return new Error(`${i}: incorrect result +- expected: ${expect} +- got: ${res}`); + } + } + } else { + let uints = 1; + if (type instanceof VectorType) { + uints = type.width === 3 ? 4 : type.width; + if (type.requiresF16()) { + uints = Math.floor(uints / 2); + } + } + for (let i = 0; i < 4; i++) { + for (let j = 0; j < uints; j++) { + const expect = input[swapIndex(i, op) * uints + j]; + const res = output[i * uints + j]; + if (res !== expect) { + return new Error(`${uints * i + j}: incorrect result +- expected: ${expect} +- got: ${res}`); + } + } + } + } + + return undefined; +} + +g.test('data_types') + .desc('Test allowed data types') + .params(u => u.combine('type', keysOf(kTypes)).beginSubcases().combine('op', kOps)) + .beforeAllSubcases(t => { + const features: GPUFeatureName[] = ['subgroups' as GPUFeatureName]; + const type = kTypes[t.params.type]; + if (type.requiresF16()) { + features.push('subgroups-f16' as GPUFeatureName); + features.push('shader-f16' as GPUFeatureName); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(async t => { + const wgSize = [4, 1, 1]; + const type = kTypes[t.params.type]; + let enables = `enable subgroups;\n`; + if (type.requiresF16()) { + enables += `enable f16;\nenable subgroups_f16;`; + } + const wgsl = ` +${enables} + +@group(0) @binding(0) +var input : array<${type.toString()}>; + +@group(0) @binding(1) +var output : array<${type.toString()}>; + +@group(0) @binding(2) +var metadata : array; // unused + +@compute @workgroup_size(${wgSize[0]}, ${wgSize[1]}, ${wgSize[2]}) +fn main( + @builtin(subgroup_invocation_id) id : u32, +) { + // Force usage + _ = metadata[0]; + + output[id] = ${t.params.op}(input[id]); +}`; + + const inputData = generateTypedInputs(type); + let uintsPerOutput = 1; + if (type instanceof VectorType) { + uintsPerOutput = type.width === 3 ? 4 : type.width; + if (type.requiresF16()) { + uintsPerOutput = Math.floor(uintsPerOutput / 2); + } + } + await runComputeTest( + t, + wgsl, + wgSize, + uintsPerOutput, + inputData, + (metadata: Uint32Array, output: Uint32Array) => { + return checkDataTypes(metadata, output, inputData, t.params.op, type); + } + ); + }); + +/** + * Checks quad swaps in compute shaders + * + * Assumes that quads are linear within a subgroup. + * + * @param metadata An array of integers divided as follows: + * * first half subgroup invocation ids + * * second half subgroup sizes + * @param output An array of integers divided as follows: + * * first half results of quad broadcast + * * second half generated unique subgroup ids + * @param op The swap operation + * @param filter A functor to filter active invocations + */ +function checkSwapCompute( + metadata: Uint32Array, + output: Uint32Array, + op: SwapOp, + filter: (id: number, size: number) => boolean +): Error | undefined { + const bound = Math.floor(output.length / 2); + for (let i = 0; i < bound; i++) { + const subgroup_id = output[bound + i]; + const id = metadata[i]; + const size = metadata[bound + i]; + if (!filter(id, size)) { + if (output[i] !== kDataSentinel) { + return new Error(`Unexpected write for invocation ${i}`); + } + continue; + } + + const quad_id = Math.floor(id / 4); + const quad_index = id % 4; + let found = false; + for (let j = 0; j < bound; j++) { + const other_id = metadata[j]; + const other_quad_id = Math.floor(other_id / 4); + const other_quad_index = other_id % 4; + const other_subgroup_id = output[bound + j]; + if ( + subgroup_id === other_subgroup_id && + quad_id === other_quad_id && + quad_index === swapIndex(other_quad_index, op) + ) { + found = true; + if (output[i] !== j) { + return new Error(`Invocation ${i}: incorrect result +- expected: ${j} +- got: ${output[i]}`); + } + break; + } + } + if (!found) { + return new Error(`Invocation ${i}: failed to find swapped result`); + } + } + + return undefined; +} + +g.test('compute,all_active') + .desc( + `Tests swaps with all active invocations + +Quad operations require a full quad so workgroup sizes are limited to multiples of 4. + ` + ) + .params(u => + u + .combine('wgSize', kWGSizes) + .filter(t => { + const wgThreads = t.wgSize[0] * t.wgSize[1] * t.wgSize[2]; + return wgThreads % 4 === 0; + }) + .beginSubcases() + .combine('op', kOps) + ) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(async t => { + const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2]; + + const wgsl = ` +enable subgroups; + +@group(0) @binding(0) +var inputs : u32; // unused + +struct Output { + results : array, + subgroup_size : array, +} + +@group(0) @binding(1) +var output : Output; + +struct Metadata { + id : array, + subgroup_size : array, +} + +@group(0) @binding(2) +var metadata : Metadata; + +@compute @workgroup_size(${t.params.wgSize[0]}, ${t.params.wgSize[1]}, ${t.params.wgSize[2]}) +fn main( + @builtin(local_invocation_index) lid : u32, + @builtin(subgroup_invocation_id) id : u32, + @builtin(subgroup_size) subgroupSize : u32, +) { + // Force usage + _ = inputs; + + let b = ${t.params.op}(lid); + output.results[lid] = b; + output.subgroup_size[lid] = subgroupBroadcastFirst(lid + 1); + metadata.id[lid] = id; + metadata.subgroup_size[lid] = subgroupSize; +}`; + + const uintsPerOutput = 2; + await runComputeTest( + t, + wgsl, + [t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]], + uintsPerOutput, + new Uint32Array([0]), // unused + (metadata: Uint32Array, output: Uint32Array) => { + return checkSwapCompute(metadata, output, t.params.op, (id: number, size: number) => { + return true; + }); + } + ); + }); + +g.test('compute,split') + .desc( + `Tests swaps with all predicated invocations + +Quad operations require a full quad so workgroup sizes are limited to multiples of 4. +Quad operations require a fully active quad to operate correctly so several of the +predication filters are skipped. + ` + ) + .params(u => + u + .combine('predicate', keysOf(kPredicateCases)) + .filter(t => { + return t.predicate === 'lower_half' || t.predicate === 'upper_half'; + }) + .combine('wgSize', kWGSizes) + .filter(t => { + const wgThreads = t.wgSize[0] * t.wgSize[1] * t.wgSize[2]; + return wgThreads % 4 === 0; + }) + .beginSubcases() + .combine('op', kOps) + ) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(async t => { + const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2]; + const testcase = kPredicateCases[t.params.predicate]; + + const wgsl = ` +enable subgroups; + +@group(0) @binding(0) +var inputs : u32; // unused + +struct Output { + results : array, + subgroup_size : array, +} + +@group(0) @binding(1) +var output : Output; + +struct Metadata { + id : array, + subgroup_size : array, +} + +@group(0) @binding(2) +var metadata : Metadata; + +@compute @workgroup_size(${t.params.wgSize[0]}, ${t.params.wgSize[1]}, ${t.params.wgSize[2]}) +fn main( + @builtin(local_invocation_index) lid : u32, + @builtin(subgroup_invocation_id) id : u32, + @builtin(subgroup_size) subgroupSize : u32, +) { + // Force usage + _ = inputs; + + output.subgroup_size[lid] = subgroupBroadcastFirst(lid + 1); + metadata.id[lid] = id; + metadata.subgroup_size[lid] = subgroupSize; + + if ${testcase.cond} { + let b = ${t.params.op}(lid); + output.results[lid] = b; + } +}`; + + const uintsPerOutput = 2; + await runComputeTest( + t, + wgsl, + [t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]], + uintsPerOutput, + new Uint32Array([0]), // unused + (metadata: Uint32Array, output: Uint32Array) => { + return checkSwapCompute(metadata, output, t.params.op, testcase.filter); + } + ); + }); + +/** + * Checks results of quad swaps in fragment shaders. + * + * @param data The framebuffer output + * * component 0 is the broadcast of the integer x position + * * component 1 is the broadcast of the integer y position + * @param format The framebuffer format + * @param width Framebuffer width + * @param height Framebuffer height + * @param broadcast The quad id being broadcast + */ +function checkFragment( + data: Uint32Array, + format: GPUTextureFormat, + width: number, + height: number, + op: SwapOp +): Error | undefined { + if (width < 3 || height < 3) { + return new Error( + `Insufficient framebuffer size [${width}w x ${height}h]. Minimum is [3w x 3h].` + ); + } + + const { blockWidth, blockHeight, bytesPerBlock } = kTextureFormatInfo[format]; + const blocksPerRow = width / blockWidth; + // 256 minimum comes from image copy requirements. + const bytesPerRow = align(blocksPerRow * (bytesPerBlock ?? 1), 256); + const uintsPerRow = bytesPerRow / 4; + const uintsPerTexel = (bytesPerBlock ?? 1) / blockWidth / blockHeight / 4; + + const coordToIndex = (row: number, col: number) => { + return uintsPerRow * row + col * uintsPerTexel; + }; + + // Iteration skips last row and column to avoid helper invocations because it is not + // guaranteed whether or not they participate in the subgroup operation. + for (let row = 0; row < height - 1; row++) { + for (let col = 0; col < width - 1; col++) { + const offset = coordToIndex(row, col); + + const row_is_odd = row % 2 === 1; + const col_is_odd = col % 2 === 1; + + // Skip checking quads that extend into potential helper invocations. + const max_row = row_is_odd ? row : row + 1; + const max_col = col_is_odd ? col : col + 1; + if (max_row === height - 1 || max_col === width - 1) { + continue; + } + + let expect_row = row; + let expect_col = col; + switch (op) { + case 'quadSwapX': + expect_col = col_is_odd ? col - 1 : col + 1; + break; + case 'quadSwapY': + expect_row = row_is_odd ? row - 1 : row + 1; + break; + case 'quadSwapDiagonal': + expect_row = row_is_odd ? row - 1 : row + 1; + expect_col = col_is_odd ? col - 1 : col + 1; + break; + } + + const row_output = data[offset + 1]; + const col_output = data[offset]; + if (expect_row !== row_output) { + return new Error(`Row ${row}, col ${col}: incorrect row results: +- expected: ${expect_row} +- got: ${row_output}`); + } + + if (expect_col !== col_output) { + return new Error(`Row ${row}, col ${col}: incorrect col results: +- expected: ${expect_row} +- got: ${col_output}`); + } + } + } + + return undefined; +} + +g.test('fragment,all_active') + .desc(`Tests quad swaps in fragment shaders`) + .params(u => + u + .combine('size', kFramebufferSizes) + .beginSubcases() + .combine('op', kOps) + .combineWithParams([{ format: 'rgba32uint' }] as const) + ) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(async t => { + const fsShader = ` +enable subgroups; + +@group(0) @binding(0) +var inputs : array; // unused + +@fragment +fn main( + @builtin(position) pos : vec4f, +) -> @location(0) vec4u { + // Force usage + _ = inputs[0]; + + let linear = u32(pos.x) + u32(pos.y) * ${t.params.size[0]}; + + // Filter out possible helper invocations. + let x_in_range = u32(pos.x) < (${t.params.size[0]} - 1); + let y_in_range = u32(pos.y) < (${t.params.size[1]} - 1); + let in_range = x_in_range && y_in_range; + + var x_swap = select(1001, u32(pos.x), in_range); + var y_swap = select(1001, u32(pos.y), in_range); + + x_swap = ${t.params.op}(x_swap); + y_swap = ${t.params.op}(y_swap); + + return vec4u(x_swap, y_swap, 0, 0); +}`; + + await runFragmentTest( + t, + t.params.format, + fsShader, + t.params.size[0], + t.params.size[1], + new Uint32Array([0]), // unused, + (data: Uint32Array) => { + return checkFragment( + data, + t.params.format, + t.params.size[0], + t.params.size[1], + t.params.op + ); + } + ); + }); + +g.test('fragment,split').unimplemented(); diff --git a/src/webgpu/shader/execution/expression/call/builtin/smoothstep.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/smoothstep.spec.ts index 42d8d09ff569..f65bb951bf25 100644 --- a/src/webgpu/shader/execution/expression/call/builtin/smoothstep.spec.ts +++ b/src/webgpu/shader/execution/expression/call/builtin/smoothstep.spec.ts @@ -7,11 +7,16 @@ T is S or vecN Returns the smooth Hermite interpolation between 0 and 1. Component-wise when T is a vector. For scalar T, the result is t * t * (3.0 - 2.0 * t), where t = clamp((x - low) / (high - low), 0.0, 1.0). + +If low >= high: +* It is a shader-creation error if low and high are const-expressions. +* It is a pipeline-creation error if low and high are override-expressions. `; import { makeTestGroup } from '../../../../../../common/framework/test_group.js'; import { GPUTest } from '../../../../../gpu_test.js'; -import { Type } from '../../../../../util/conversion.js'; +import { ScalarValue, Type, Value } from '../../../../../util/conversion.js'; +import { Case } from '../../case.js'; import { allInputSources, onlyConstInputSource, run } from '../../expression.js'; import { abstractFloatBuiltin, builtin } from './builtin.js'; @@ -19,6 +24,13 @@ import { d } from './smoothstep.cache.js'; export const g = makeTestGroup(GPUTest); +// Returns true if `c` is valid for a const evaluation of smoothstep. +function validForConst(c: Case): boolean { + const low = (c.input as Value[])[0] as ScalarValue; + const high = (c.input as Value[])[1] as ScalarValue; + return low.value < high.value; +} + g.test('abstract_float') .specURL('https://www.w3.org/TR/WGSL/#float-builtin-functions') .desc(`abstract float tests`) @@ -28,7 +40,7 @@ g.test('abstract_float') .combine('vectorize', [undefined, 2, 3, 4] as const) ) .fn(async t => { - const cases = await d.get('abstract_const'); + const cases = (await d.get('abstract_const')).filter(c => validForConst(c)); await run( t, abstractFloatBuiltin('smoothstep'), @@ -47,7 +59,15 @@ g.test('f32') ) .fn(async t => { const cases = await d.get(t.params.inputSource === 'const' ? 'f32_const' : 'f32_non_const'); - await run(t, builtin('smoothstep'), [Type.f32, Type.f32, Type.f32], Type.f32, t.params, cases); + const validCases = cases.filter(c => t.params.inputSource !== 'const' || validForConst(c)); + await run( + t, + builtin('smoothstep'), + [Type.f32, Type.f32, Type.f32], + Type.f32, + t.params, + validCases + ); }); g.test('f16') @@ -61,5 +81,13 @@ g.test('f16') }) .fn(async t => { const cases = await d.get(t.params.inputSource === 'const' ? 'f16_const' : 'f16_non_const'); - await run(t, builtin('smoothstep'), [Type.f16, Type.f16, Type.f16], Type.f16, t.params, cases); + const validCases = cases.filter(c => t.params.inputSource !== 'const' || validForConst(c)); + await run( + t, + builtin('smoothstep'), + [Type.f16, Type.f16, Type.f16], + Type.f16, + t.params, + validCases + ); }); diff --git a/src/webgpu/shader/execution/expression/call/builtin/subgroupAdd.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/subgroupAdd.spec.ts new file mode 100644 index 000000000000..04792b2d98c1 --- /dev/null +++ b/src/webgpu/shader/execution/expression/call/builtin/subgroupAdd.spec.ts @@ -0,0 +1,364 @@ +export const description = ` +Execution tests for subgroupAdd, subgroupExclusiveAdd, and subgroupInclusiveAdd + +Note: There is a lack of portability for non-uniform execution so these tests +restrict themselves to uniform control flow. +Note: There is no guaranteed mapping between subgroup_invocation_id and +local_invocation_index. Tests should avoid assuming there is. +`; + +import { makeTestGroup } from '../../../../../../common/framework/test_group.js'; +import { keysOf, objectsToRecord } from '../../../../../../common/util/data_tables.js'; +import { iterRange } from '../../../../../../common/util/util.js'; +import { GPUTest } from '../../../../../gpu_test.js'; +import { + kConcreteNumericScalarsAndVectors, + Type, + VectorType, + numberToFloatBits, + floatBitsToNumber, + kFloat32Format, + kFloat16Format, + scalarTypeOf, +} from '../../../../../util/conversion.js'; +import { FP } from '../../../../../util/floating_point.js'; + +import { + kNumCases, + kStride, + kWGSizes, + kPredicateCases, + runAccuracyTest, + runComputeTest, +} from './subgroup_util.js'; + +export const g = makeTestGroup(GPUTest); + +const kIdentity = 0; + +const kDataTypes = objectsToRecord(kConcreteNumericScalarsAndVectors); + +const kOperations = ['subgroupAdd', 'subgroupExclusiveAdd', 'subgroupInclusiveAdd'] as const; + +g.test('fp_accuracy') + .desc( + `Tests the accuracy of floating-point addition. + +The order of operations is implementation defined, most threads are filled with +the identity value and two receive random values. +Subgroup sizes are not known ahead of time so some cases may not perform any +interesting operations. The test biases towards checking subgroup sizes under 64. +These tests only check two values in order to reuse more of the existing infrastructure +and limit the number of permutations needed to calculate the final result.` + ) + .params(u => + u + .combine('case', [...iterRange(kNumCases, x => x)]) + .combine('type', ['f32', 'f16'] as const) + .combine('wgSize', [ + [kStride, 1, 1], + [kStride / 2, 2, 1], + ] as const) + ) + .beforeAllSubcases(t => { + const features: GPUFeatureName[] = ['subgroups' as GPUFeatureName]; + if (t.params.type === 'f16') { + features.push('shader-f16'); + features.push('subgroups-f16' as GPUFeatureName); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(async t => { + await runAccuracyTest( + t, + t.params.case, + [t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]], + 'subgroupAdd', + t.params.type, + kIdentity, + t.params.type === 'f16' ? FP.f16.additionInterval : FP.f32.additionInterval + ); + }); + +/** + * Checks subgroup additions + * + * Expected results: + * - subgroupAdd: each invocation should have result equal to real subgroup size + * - subgroupExclusiveAdd: each invocation should have result equal to its subgroup invocation id + * - subgroupInclusiveAdd: each invocation should be equal to the result of subgroupExclusiveAdd plus the fill value + * @param metadata An array containing actual subgroup size per invocation followed by + * subgroup invocation id per invocation + * @param output An array of additions + * @param type The data type + * @param operation Type of addition + * @param expectedfillValue The original value used to fill the test array + */ +function checkAddition( + metadata: Uint32Array, + output: Uint32Array, + type: Type, + operation: 'subgroupAdd' | 'subgroupExclusiveAdd' | 'subgroupInclusiveAdd', + expectedfillValue: number +): undefined | Error { + let numEles = 1; + if (type instanceof VectorType) { + numEles = type.width; + } + const scalarTy = scalarTypeOf(type); + const expectedOffset = operation === 'subgroupAdd' ? 0 : metadata.length / 2; + for (let i = 0; i < metadata.length / 2; i++) { + let expected = metadata[i + expectedOffset]; + if (operation === 'subgroupInclusiveAdd') { + expected += expectedfillValue; + } + + for (let j = 0; j < numEles; j++) { + let idx = i * numEles + j; + const isOdd = idx & 0x1; + if (scalarTy === Type.f16) { + idx = Math.floor(idx / 2); + } + let val = output[idx]; + if (scalarTy === Type.f32) { + val = floatBitsToNumber(val, kFloat32Format); + } else if (scalarTy === Type.f16) { + if (isOdd) { + val = val >> 16; + } + val = floatBitsToNumber(val & 0xffff, kFloat16Format); + } + if (expected !== val) { + return new Error(`Invocation ${i}, component ${j}: incorrect result +- expected: ${expected} +- got: ${val}`); + } + } + } + + return undefined; +} + +g.test('data_types') + .desc( + `Tests subgroup addition for valid data types + +Tests a simple addition of all 1 values. +Reductions expect result to be equal to actual subgroup size. +Exclusice scans expect result to be equal subgroup invocation id. + +TODO: support vec3 types. + ` + ) + .params(u => + u + .combine('type', keysOf(kDataTypes)) + .filter(t => { + const type = kDataTypes[t.type]; + if (type instanceof VectorType) { + return type.width !== 3; + } + return true; + }) + .beginSubcases() + .combine('wgSize', kWGSizes) + .combine('operation', kOperations) + ) + .beforeAllSubcases(t => { + const features: GPUFeatureName[] = ['subgroups' as GPUFeatureName]; + const type = kDataTypes[t.params.type]; + if (type.requiresF16()) { + features.push('shader-f16'); + features.push('subgroups-f16' as GPUFeatureName); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(async t => { + const type = kDataTypes[t.params.type]; + let numEles = 1; + if (type instanceof VectorType) { + numEles = type.width; + } + const scalarType = scalarTypeOf(type); + let enables = 'enable subgroups;\n'; + if (type.requiresF16()) { + enables += 'enable f16;\nenable subgroups_f16;\n'; + } + + const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2]; + + const wgsl = ` +${enables} + +@group(0) @binding(0) +var inputs : array<${type.toString()}>; + +@group(0) @binding(1) +var outputs : array<${type.toString()}>; + +struct Metadata { + subgroup_size : array, + subgroup_invocation_id : array, +} + +@group(0) @binding(2) +var metadata : Metadata; + +@compute @workgroup_size(${t.params.wgSize[0]}, ${t.params.wgSize[1]}, ${t.params.wgSize[2]}) +fn main( + @builtin(local_invocation_index) lid : u32, + @builtin(subgroup_invocation_id) id : u32, +) { + // Record the actual subgroup size for this invocation. + // Note: subgroup_size builtin is always a power-of-2 and might be larger + // if the subgroup is not full. + let ballot = subgroupBallot(true); + var size = countOneBits(ballot.x); + size += countOneBits(ballot.y); + size += countOneBits(ballot.z); + size += countOneBits(ballot.w); + metadata.subgroup_size[lid] = size; + + // Record subgroup invocation id for this invocation. + metadata.subgroup_invocation_id[lid] = id; + + outputs[lid] = ${t.params.operation}(inputs[lid]); +}`; + const expectedFillValue = 1; + let fillValue = expectedFillValue; + let numUints = wgThreads * numEles; + if (scalarType === Type.f32) { + fillValue = numberToFloatBits(1, kFloat32Format); + } else if (scalarType === Type.f16) { + const f16 = numberToFloatBits(1, kFloat16Format); + fillValue = f16 | (f16 << 16); + numUints = Math.ceil(numUints / 2); + } + await runComputeTest( + t, + wgsl, + [t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]], + numUints, + new Uint32Array([...iterRange(numUints, x => fillValue)]), + (metadata: Uint32Array, output: Uint32Array) => { + return checkAddition(metadata, output, type, t.params.operation, expectedFillValue); + } + ); + }); + +g.test('fragment').unimplemented(); + +/** + * Performs correctness checking for predicated additions + * + * Assumes the shader performs a predicated subgroup addition with the + * subgroup_invocation_id as the data. + * + * @param metadata An array containing subgroup sizes and subgroup invocation ids + * @param output An array containing the output results + * @param operation The type of addition + * @param filter A functor that mirrors the predication in the shader + */ +function checkPredicatedAddition( + metadata: Uint32Array, + output: Uint32Array, + operation: 'subgroupAdd' | 'subgroupExclusiveAdd' | 'subgroupInclusiveAdd', + filter: (id: number, size: number) => boolean +): Error | undefined { + for (let i = 0; i < output.length; i++) { + const size = metadata[i]; + const id = metadata[output.length + i]; + let expected = 0; + if (filter(id, size)) { + const bound = + operation === 'subgroupInclusiveAdd' ? id + 1 : operation === 'subgroupAdd' ? size : id; + for (let j = 0; j < bound; j++) { + if (filter(j, size)) { + expected += j; + } + } + } else { + expected = 999; + } + if (expected !== output[i]) { + return new Error(`Invocation ${i}: incorrect result +- expected: ${expected} +- got: ${output[i]}`); + } + } + return undefined; +} + +g.test('compute,split') + .desc('Tests that only active invocations contribute to the operation') + .params(u => + u + .combine('case', keysOf(kPredicateCases)) + .beginSubcases() + .combine('operation', kOperations) + .combine('wgSize', kWGSizes) + ) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(async t => { + const testcase = kPredicateCases[t.params.case]; + const outputUintsPerElement = 1; + const inputData = new Uint32Array([0]); // no input data + const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2]; + + const wgsl = ` +enable subgroups; + +@group(0) @binding(0) +var input : array; + +@group(0) @binding(1) +var outputs : array; + +struct Metadata { + subgroup_size : array, + subgroup_invocation_id : array, +} + +@group(0) @binding(2) +var metadata : Metadata; + +@compute @workgroup_size(${t.params.wgSize[0]}, ${t.params.wgSize[1]}, ${t.params.wgSize[2]}) +fn main( + @builtin(local_invocation_index) lid : u32, + @builtin(subgroup_invocation_id) id : u32, +) { + _ = input[0]; + + // Record the actual subgroup size for this invocation. + // Note: subgroup_size builtin is always a power-of-2 and might be larger + // if the subgroup is not full. + let ballot = subgroupBallot(true); + var subgroupSize = countOneBits(ballot.x); + subgroupSize += countOneBits(ballot.y); + subgroupSize += countOneBits(ballot.z); + subgroupSize += countOneBits(ballot.w); + metadata.subgroup_size[lid] = subgroupSize; + + // Record subgroup invocation id for this invocation. + metadata.subgroup_invocation_id[lid] = id; + + if ${testcase.cond} { + outputs[lid] = ${t.params.operation}(id); + } else { + return; + } +}`; + + await runComputeTest( + t, + wgsl, + [t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]], + outputUintsPerElement, + inputData, + (metadata: Uint32Array, output: Uint32Array) => { + return checkPredicatedAddition(metadata, output, t.params.operation, testcase.filter); + } + ); + }); diff --git a/src/webgpu/shader/execution/expression/call/builtin/subgroupAll.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/subgroupAll.spec.ts new file mode 100644 index 000000000000..0aa461c4a578 --- /dev/null +++ b/src/webgpu/shader/execution/expression/call/builtin/subgroupAll.spec.ts @@ -0,0 +1,390 @@ +export const description = ` +Execution tests for subgroupAll. + +Note: There is a lack of portability for non-uniform execution so these tests +restrict themselves to uniform control flow. +Note: There is no guaranteed mapping between subgroup_invocation_id and +local_invocation_index. Tests should avoid assuming there is. +`; + +import { makeTestGroup } from '../../../../../../common/framework/test_group.js'; +import { keysOf } from '../../../../../../common/util/data_tables.js'; +import { iterRange } from '../../../../../../common/util/util.js'; +import { kTextureFormatInfo } from '../../../../../format_info.js'; +import { align } from '../../../../../util/math.js'; +import { PRNG } from '../../../../../util/prng.js'; + +import { + kWGSizes, + kPredicateCases, + SubgroupTest, + kDataSentinel, + kFramebufferSizes, + runComputeTest, + runFragmentTest, +} from './subgroup_util.js'; + +export const g = makeTestGroup(SubgroupTest); + +const kNumCases = 15; + +/** + * Generate input data for testing. + * + * Data is generated in the following categories: + * Seed 0 generates all 0 data + * Seed 1 generates all 1 data + * Seeds 2-9 generates all 1s except for a zero randomly once per 32 elements + * Seeds 10+ generate all random data + * @param seed The seed for the PRNG + * @param num The number of data items to generate + */ +function generateInputData(seed: number, num: number): Uint32Array { + const prng = new PRNG(seed); + + const bound = Math.min(num, 32); + const index = prng.uniformInt(bound); + + return new Uint32Array([ + ...iterRange(num, x => { + if (seed === 0) { + return 0; + } else if (seed === 1) { + return 1; + } else if (seed < 10) { + const bounded = x % bound; + return bounded === index ? 0 : 1; + } + return prng.uniformInt(2); + }), + ]); +} + +/** + * Checks the result of a subgroupAll operation + * + * Since subgroup size depends on the pipeline compile, we calculate the expected + * results after execution. The shader generates a subgroup id and records it for + * each invocation. The check first calculates the expected result for each subgroup + * and then compares to the actual result for each invocation. The filter functor + * ensures only the correct invocations contribute to the calculation. + * @param metadata An array of uints: + * * first half containing subgroup sizes (from builtin value) + * * second half subgroup invocation id + * @param output An array of uints containing: + * * first half is the outputs of subgroupAll + * * second half is a generated subgroup id + * @param numInvs Number of invocations executed + * @param input The input data (equal size to output) + * @param filter A functor to filter active invocations + */ +function checkAll( + metadata: Uint32Array, // unused + output: Uint32Array, + numInvs: number, + input: Uint32Array, + filter: (id: number, size: number) => boolean +): Error | undefined { + // First, generate expected results. + const expected = new Map(); + for (let inv = 0; inv < numInvs; inv++) { + const size = metadata[inv]; + const id = metadata[inv + numInvs]; + if (!filter(id, size)) { + continue; + } + const subgroup_id = output[numInvs + inv]; + let v = expected.get(subgroup_id) ?? 1; + v &= input[inv]; + expected.set(subgroup_id, v); + } + + // Second, check against actual results. + for (let inv = 0; inv < numInvs; inv++) { + const size = metadata[inv]; + const id = metadata[inv + numInvs]; + const res = output[inv]; + if (filter(id, size)) { + const subgroup_id = output[numInvs + inv]; + const expected_v = expected.get(subgroup_id) ?? 0; + if (expected_v !== res) { + return new Error(`Invocation ${inv}: +- expected: ${expected_v} +- got: ${res}`); + } + } else { + if (res !== kDataSentinel) { + return new Error(`Invocation ${inv} unexpected write: +- subgroup invocation id: ${id} +- subgroup size: ${size}`); + } + } + } + + return undefined; +} + +g.test('compute,all_active') + .desc(`Test compute subgroupAll`) + .params(u => + u + .combine('wgSize', kWGSizes) + .beginSubcases() + .combine('case', [...iterRange(kNumCases, x => x)]) + ) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(async t => { + const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2]; + + const wgsl = ` +enable subgroups; + +@group(0) @binding(0) +var inputs : array; + +@group(0) @binding(1) +var outputs : array; + +struct Metadata { + subgroup_size: array, + subgroup_invocation_id: array, +} + +@group(0) @binding(2) +var metadata : Metadata; + +@compute @workgroup_size(${t.params.wgSize[0]}, ${t.params.wgSize[1]}, ${t.params.wgSize[2]}) +fn main( + @builtin(local_invocation_index) lid : u32, + @builtin(subgroup_invocation_id) id : u32, + @builtin(subgroup_size) subgroupSize : u32, +) { + metadata.subgroup_size[lid] = subgroupSize; + + metadata.subgroup_invocation_id[lid] = id; + + // Record a representative subgroup id. + outputs[lid + ${wgThreads}] = subgroupBroadcastFirst(lid); + + let res = select(0u, 1u, subgroupAll(bool(inputs[lid]))); + outputs[lid] = res; +}`; + + const inputData = generateInputData(t.params.case, wgThreads); + + const uintsPerOutput = 2; + await runComputeTest( + t, + wgsl, + [t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]], + uintsPerOutput, + inputData, + (metadata: Uint32Array, output: Uint32Array) => { + return checkAll(metadata, output, wgThreads, inputData, (id: number, size: number) => { + return true; + }); + } + ); + }); + +g.test('compute,split') + .desc('Test that only active invocation participate') + .params(u => + u + .combine('predicate', keysOf(kPredicateCases)) + .beginSubcases() + .combine('wgSize', kWGSizes) + .combine('case', [...iterRange(kNumCases, x => x)]) + ) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(async t => { + const testcase = kPredicateCases[t.params.predicate]; + const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2]; + + const wgsl = ` +enable subgroups; + +@group(0) @binding(0) +var inputs : array; + +@group(0) @binding(1) +var outputs : array; + +struct Metadata { + subgroup_size : array, + subgroup_invocation_id : array, +} + +@group(0) @binding(2) +var metadata : Metadata; + +@compute @workgroup_size(${t.params.wgSize[0]}, ${t.params.wgSize[1]}, ${t.params.wgSize[2]}) +fn main( + @builtin(local_invocation_index) lid : u32, + @builtin(subgroup_invocation_id) id : u32, + @builtin(subgroup_size) subgroupSize : u32, +) { + metadata.subgroup_size[lid] = subgroupSize; + + // Record subgroup invocation id for this invocation. + metadata.subgroup_invocation_id[lid] = id; + + // Record a generated subgroup id. + outputs[${wgThreads} + lid] = subgroupBroadcastFirst(lid); + + if ${testcase.cond} { + outputs[lid] = select(0u, 1u, subgroupAll(bool(inputs[lid]))); + } else { + return; + } +}`; + + const inputData = generateInputData(t.params.case, wgThreads); + + const uintsPerOutput = 2; + await runComputeTest( + t, + wgsl, + [t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]], + uintsPerOutput, + inputData, + (metadata: Uint32Array, output: Uint32Array) => { + return checkAll(metadata, output, wgThreads, inputData, testcase.filter); + } + ); + }); + +/** + * Checks subgroupAll results from a fragment shader. + * + * @param data Framebuffer output + * * component 0 is result + * * component 1 is generated subgroup id + * @param input An array of input data + * @param format The framebuffer format + * @param width Framebuffer width + * @param height Framebuffer height + */ +function checkFragmentAll( + data: Uint32Array, + input: Uint32Array, + format: GPUTextureFormat, + width: number, + height: number +): Error | undefined { + const { blockWidth, blockHeight, bytesPerBlock } = kTextureFormatInfo[format]; + const blocksPerRow = width / blockWidth; + // 256 minimum comes from image copy requirements. + const bytesPerRow = align(blocksPerRow * (bytesPerBlock ?? 1), 256); + const uintsPerRow = bytesPerRow / 4; + const uintsPerTexel = (bytesPerBlock ?? 1) / blockWidth / blockHeight / 4; + + // Iteration skips last row and column to avoid helper invocations because it is not + // guaranteed whether or not they participate in the subgroup operation. + const expected = new Map(); + for (let row = 0; row < height - 1; row++) { + for (let col = 0; col < width - 1; col++) { + const offset = uintsPerRow * row + col * uintsPerTexel; + const subgroup_id = data[offset + 1]; + + if (subgroup_id === 0) { + return new Error(`Internal error: helper invocation at (${col}, ${row})`); + } + + let v = expected.get(subgroup_id) ?? 1; + // First index of input is an atomic counter. + v &= input[row * width + col]; + expected.set(subgroup_id, v); + } + } + + for (let row = 0; row < height - 1; row++) { + for (let col = 0; col < width - 1; col++) { + const offset = uintsPerRow * row + col * uintsPerTexel; + const res = data[offset]; + const subgroup_id = data[offset + 1]; + + if (subgroup_id === 0) { + // Inactive in the fragment. + continue; + } + + const expected_v = expected.get(subgroup_id) ?? 0; + if (expected_v !== res) { + return new Error(`Row ${row}, col ${col}: incorrect results: +- expected: ${expected_v} +- got: ${res}`); + } + } + } + + return undefined; +} + +g.test('fragment,all_active') + .desc('Tests subgroupAll in fragment shaders') + .params(u => + u + .combine('size', kFramebufferSizes) + .beginSubcases() + .combine('case', [...iterRange(kNumCases, x => x)]) + .combineWithParams([{ format: 'rg32uint' }] as const) + ) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(async t => { + const numInputs = t.params.size[0] * t.params.size[1]; + const inputData = generateInputData(t.params.case, numInputs); + + const fsShader = ` +enable subgroups; + +@group(0) @binding(0) +var inputs : array; + +@fragment +fn main( + @builtin(position) pos : vec4f, +) -> @location(0) vec2u { + // Generate a subgroup id based on linearized position, but avoid 0. + let linear = u32(pos.x) + u32(pos.y) * ${t.params.size[0]}; + var subgroup_id = linear + 1; + subgroup_id = subgroupBroadcastFirst(subgroup_id); + + // Filter out possible helper invocations. + let x_in_range = u32(pos.x) < (${t.params.size[0]} - 1); + let y_in_range = u32(pos.y) < (${t.params.size[1]} - 1); + let in_range = x_in_range && y_in_range; + let input = select(1u, inputs[linear], in_range); + + let res = select(0u, 1u, subgroupAll(bool(input))); + return vec2u(res, subgroup_id); +}`; + + await runFragmentTest( + t, + t.params.format, + fsShader, + t.params.size[0], + t.params.size[1], + inputData, + (data: Uint32Array) => { + return checkFragmentAll( + data, + inputData, + t.params.format, + t.params.size[0], + t.params.size[1] + ); + } + ); + }); + +// Using subgroup operations in control with fragment shaders +// quickly leads to unportable behavior. +g.test('fragment,split').unimplemented(); diff --git a/src/webgpu/shader/execution/expression/call/builtin/subgroupAny.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/subgroupAny.spec.ts new file mode 100644 index 000000000000..5d5b9de11420 --- /dev/null +++ b/src/webgpu/shader/execution/expression/call/builtin/subgroupAny.spec.ts @@ -0,0 +1,390 @@ +export const description = ` +Execution tests for subgroupAny. + +Note: There is a lack of portability for non-uniform execution so these tests +restrict themselves to uniform control flow. +Note: There is no guaranteed mapping between subgroup_invocation_id and +local_invocation_index. Tests should avoid assuming there is. +`; + +import { makeTestGroup } from '../../../../../../common/framework/test_group.js'; +import { keysOf } from '../../../../../../common/util/data_tables.js'; +import { iterRange } from '../../../../../../common/util/util.js'; +import { kTextureFormatInfo } from '../../../../../format_info.js'; +import { align } from '../../../../../util/math.js'; +import { PRNG } from '../../../../../util/prng.js'; + +import { + kWGSizes, + kPredicateCases, + SubgroupTest, + kDataSentinel, + runComputeTest, + runFragmentTest, + kFramebufferSizes, +} from './subgroup_util.js'; + +export const g = makeTestGroup(SubgroupTest); + +const kNumCases = 15; + +/** + * Generate input data for testing. + * + * Data is generated in the following categories: + * Seed 0 generates all 0 data + * Seed 1 generates all 1 data + * Seeds 2-9 generates all 0s except for a one randomly once per 32 elements + * Seeds 10+ generate all random data + * @param seed The seed for the PRNG + * @param num The number of data items to generate + */ +function generateInputData(seed: number, num: number): Uint32Array { + const prng = new PRNG(seed); + + const bound = Math.min(num, 32); + const index = prng.uniformInt(bound); + + return new Uint32Array([ + ...iterRange(num, x => { + if (seed === 0) { + return 0; + } else if (seed === 1) { + return 1; + } else if (seed < 10) { + const bounded = x % bound; + return bounded === index ? 1 : 0; + } + return prng.uniformInt(2); + }), + ]); +} + +/** + * Checks the result of a subgroupAny operation + * + * Since subgroup size depends on the pipeline compile, we calculate the expected + * results after execution. The shader generates a subgroup id and records it for + * each invocation. The check first calculates the expected result for each subgroup + * and then compares to the actual result for each invocation. The filter functor + * ensures only the correct invocations contribute to the calculation. + * @param metadata An array of uints: + * * first half containing subgroup sizes (from builtin value) + * * second half subgroup invocation id + * @param output An array of uints containing: + * * first half is the outputs of subgroupAny + * * second half is a generated subgroup id + * @param numInvs Number of invocations executed + * @param input The input data (equal size to output) + * @param filter A functor to filter active invocations + */ +function checkAny( + metadata: Uint32Array, // unused + output: Uint32Array, + numInvs: number, + input: Uint32Array, + filter: (id: number, size: number) => boolean +): Error | undefined { + // First, generate expected results. + const expected = new Map(); + for (let inv = 0; inv < numInvs; inv++) { + const size = metadata[inv]; + const id = metadata[inv + numInvs]; + if (!filter(id, size)) { + continue; + } + const subgroup_id = output[numInvs + inv]; + let v = expected.get(subgroup_id) ?? 0; + v |= input[inv]; + expected.set(subgroup_id, v); + } + + // Second, check against actual results. + for (let inv = 0; inv < numInvs; inv++) { + const size = metadata[inv]; + const id = metadata[inv + numInvs]; + const res = output[inv]; + if (filter(id, size)) { + const subgroup_id = output[numInvs + inv]; + const expected_v = expected.get(subgroup_id) ?? 0; + if (expected_v !== res) { + return new Error(`Invocation ${inv}: +- expected: ${expected_v} +- got: ${res}`); + } + } else { + if (res !== kDataSentinel) { + return new Error(`Invocation ${inv} unexpected write: +- subgroup invocation id: ${id} +- subgroup size: ${size}`); + } + } + } + + return undefined; +} + +g.test('compute,all_active') + .desc(`Test compute subgroupAny`) + .params(u => + u + .combine('wgSize', kWGSizes) + .beginSubcases() + .combine('case', [...iterRange(kNumCases, x => x)]) + ) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(async t => { + const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2]; + + const wgsl = ` +enable subgroups; + +@group(0) @binding(0) +var inputs : array; + +@group(0) @binding(1) +var outputs : array; + +struct Metadata { + subgroup_size: array, + subgroup_invocation_id: array, +} + +@group(0) @binding(2) +var metadata : Metadata; + +@compute @workgroup_size(${t.params.wgSize[0]}, ${t.params.wgSize[1]}, ${t.params.wgSize[2]}) +fn main( + @builtin(local_invocation_index) lid : u32, + @builtin(subgroup_invocation_id) id : u32, + @builtin(subgroup_size) subgroupSize : u32, +) { + metadata.subgroup_size[lid] = subgroupSize; + + metadata.subgroup_invocation_id[lid] = id; + + // Record a representative subgroup id. + outputs[lid + ${wgThreads}] = subgroupBroadcastFirst(lid); + + let res = select(0u, 1u, subgroupAny(bool(inputs[lid]))); + outputs[lid] = res; +}`; + + const inputData = generateInputData(t.params.case, wgThreads); + + const uintsPerOutput = 2; + await runComputeTest( + t, + wgsl, + [t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]], + uintsPerOutput, + inputData, + (metadata: Uint32Array, output: Uint32Array) => { + return checkAny(metadata, output, wgThreads, inputData, (id: number, size: number) => { + return true; + }); + } + ); + }); + +g.test('compute,split') + .desc('Test that only active invocation participate') + .params(u => + u + .combine('predicate', keysOf(kPredicateCases)) + .beginSubcases() + .combine('wgSize', kWGSizes) + .combine('case', [...iterRange(kNumCases, x => x)]) + ) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(async t => { + const testcase = kPredicateCases[t.params.predicate]; + const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2]; + + const wgsl = ` +enable subgroups; + +@group(0) @binding(0) +var inputs : array; + +@group(0) @binding(1) +var outputs : array; + +struct Metadata { + subgroup_size : array, + subgroup_invocation_id : array, +} + +@group(0) @binding(2) +var metadata : Metadata; + +@compute @workgroup_size(${t.params.wgSize[0]}, ${t.params.wgSize[1]}, ${t.params.wgSize[2]}) +fn main( + @builtin(local_invocation_index) lid : u32, + @builtin(subgroup_invocation_id) id : u32, + @builtin(subgroup_size) subgroupSize : u32, +) { + metadata.subgroup_size[lid] = subgroupSize; + + // Record subgroup invocation id for this invocation. + metadata.subgroup_invocation_id[lid] = id; + + // Record a generated subgroup id. + outputs[${wgThreads} + lid] = subgroupBroadcastFirst(lid); + + if ${testcase.cond} { + outputs[lid] = select(0u, 1u, subgroupAny(bool(inputs[lid]))); + } else { + return; + } +}`; + + const inputData = generateInputData(t.params.case, wgThreads); + + const uintsPerOutput = 2; + await runComputeTest( + t, + wgsl, + [t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]], + uintsPerOutput, + inputData, + (metadata: Uint32Array, output: Uint32Array) => { + return checkAny(metadata, output, wgThreads, inputData, testcase.filter); + } + ); + }); + +/** + * Checks subgroupAny results from a fragment shader. + * + * @param data Framebuffer output + * * component 0 is result + * * component 1 is generated subgroup id + * @param input An array of input data + * @param format The framebuffer format + * @param width Framebuffer width + * @param height Framebuffer height + */ +function checkFragmentAny( + data: Uint32Array, + input: Uint32Array, + format: GPUTextureFormat, + width: number, + height: number +): Error | undefined { + const { blockWidth, blockHeight, bytesPerBlock } = kTextureFormatInfo[format]; + const blocksPerRow = width / blockWidth; + // 256 minimum comes from image copy requirements. + const bytesPerRow = align(blocksPerRow * (bytesPerBlock ?? 1), 256); + const uintsPerRow = bytesPerRow / 4; + const uintsPerTexel = (bytesPerBlock ?? 1) / blockWidth / blockHeight / 4; + + // Iteration skips last row and column to avoid helper invocations because it is not + // guaranteed whether or not they participate in the subgroup operation. + const expected = new Map(); + for (let row = 0; row < height - 1; row++) { + for (let col = 0; col < width - 1; col++) { + const offset = uintsPerRow * row + col * uintsPerTexel; + const subgroup_id = data[offset + 1]; + + if (subgroup_id === 0) { + return new Error(`Internal error: helper invocation at (${col}, ${row})`); + } + + let v = expected.get(subgroup_id) ?? 0; + // First index of input is an atomic counter. + v |= input[row * width + col]; + expected.set(subgroup_id, v); + } + } + + for (let row = 0; row < height - 1; row++) { + for (let col = 0; col < width - 1; col++) { + const offset = uintsPerRow * row + col * uintsPerTexel; + const res = data[offset]; + const subgroup_id = data[offset + 1]; + + if (subgroup_id === 0) { + // Inactive in the fragment. + continue; + } + + const expected_v = expected.get(subgroup_id) ?? 0; + if (expected_v !== res) { + return new Error(`Row ${row}, col ${col}: incorrect results: +- expected: ${expected_v} +- got: ${res}`); + } + } + } + + return undefined; +} + +g.test('fragment,all_active') + .desc('Tests subgroupAny in fragment shaders') + .params(u => + u + .combine('size', kFramebufferSizes) + .beginSubcases() + .combine('case', [...iterRange(kNumCases, x => x)]) + .combineWithParams([{ format: 'rg32uint' }] as const) + ) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(async t => { + const numInputs = t.params.size[0] * t.params.size[1]; + const inputData = generateInputData(t.params.case, numInputs); + + const fsShader = ` +enable subgroups; + +@group(0) @binding(0) +var inputs : array; + +@fragment +fn main( + @builtin(position) pos : vec4f, +) -> @location(0) vec2u { + // Generate a subgroup id based on linearized position, but avoid 0. + let linear = u32(pos.x) + u32(pos.y) * ${t.params.size[0]}; + var subgroup_id = linear + 1; + subgroup_id = subgroupBroadcastFirst(subgroup_id); + + // Filter out possible helper invocations. + let x_in_range = u32(pos.x) < (${t.params.size[0]} - 1); + let y_in_range = u32(pos.y) < (${t.params.size[1]} - 1); + let in_range = x_in_range && y_in_range; + let input = select(0u, inputs[linear], in_range); + + let res = select(0u, 1u, subgroupAny(bool(input))); + return vec2u(res, subgroup_id); +}`; + + await runFragmentTest( + t, + t.params.format, + fsShader, + t.params.size[0], + t.params.size[1], + inputData, + (data: Uint32Array) => { + return checkFragmentAny( + data, + inputData, + t.params.format, + t.params.size[0], + t.params.size[1] + ); + } + ); + }); + +// Using subgroup operations in control with fragment shaders +// quickly leads to unportable behavior. +g.test('fragment,split').unimplemented(); diff --git a/src/webgpu/shader/execution/expression/call/builtin/subgroupBitwise.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/subgroupBitwise.spec.ts new file mode 100644 index 000000000000..c50fd08a1570 --- /dev/null +++ b/src/webgpu/shader/execution/expression/call/builtin/subgroupBitwise.spec.ts @@ -0,0 +1,562 @@ +export const description = ` +Execution tests for subgroupAny. + +Note: There is a lack of portability for non-uniform execution so these tests +restrict themselves to uniform control flow. +Note: There is no guaranteed mapping between subgroup_invocation_id and +local_invocation_index. Tests should avoid assuming there is. +`; + +import { makeTestGroup } from '../../../../../../common/framework/test_group.js'; +import { keysOf, objectsToRecord } from '../../../../../../common/util/data_tables.js'; +import { iterRange } from '../../../../../../common/util/util.js'; +import { kTextureFormatInfo } from '../../../../../format_info.js'; +import { + kConcreteSignedIntegerScalarsAndVectors, + kConcreteUnsignedIntegerScalarsAndVectors, + scalarTypeOf, + Type, + VectorType, +} from '../../../../../util/conversion.js'; +import { align } from '../../../../../util/math.js'; +import { PRNG } from '../../../../../util/prng.js'; + +import { + kWGSizes, + kPredicateCases, + SubgroupTest, + kDataSentinel, + runComputeTest, + runFragmentTest, + kFramebufferSizes, +} from './subgroup_util.js'; + +export const g = makeTestGroup(SubgroupTest); + +const kNumCases = 15; +const kOps = ['subgroupAnd', 'subgroupOr', 'subgroupXor'] as const; +const kTypes = objectsToRecord([ + ...kConcreteSignedIntegerScalarsAndVectors, + ...kConcreteUnsignedIntegerScalarsAndVectors, +]); + +/** + * Performs the appropriate bitwise operation on v1 and v2. + * + * @param op The subgroup operation + * @param v1 The first value + * @param v2 The second value + */ +function bitwise(op: 'subgroupAnd' | 'subgroupOr' | 'subgroupXor', v1: number, v2: number): number { + switch (op) { + case 'subgroupAnd': + return v1 & v2; + case 'subgroupOr': + return v1 | v2; + case 'subgroupXor': + return v1 ^ v2; + } +} + +/** + * Returns the identity value for the subgroup operations + * + * @param op The subgroup operation + */ +function identity(op: 'subgroupAnd' | 'subgroupOr' | 'subgroupXor'): number { + switch (op) { + case 'subgroupAnd': + return ~0; + case 'subgroupOr': + case 'subgroupXor': + return 0; + } +} + +/** + * Checks the results for data type test + * + * The shader generate a unique subgroup id for each subgroup (avoiding 0). + * The check calculates the expected result for all subgroups and then compares that + * to the actual results. + * @param metadata An array of integers divided as follows: + * * first half subgroup invocation id + * * second half unique subgroup id + * @param output An array of output values + * @param type The type being tested + * @param op The subgroup operation + * @param offset A constant offset added to subgroup invocation id to form the + * the input to the subgroup operation + */ +function checkDataTypes( + metadata: Uint32Array, + output: Uint32Array, + type: Type, + op: 'subgroupAnd' | 'subgroupOr' | 'subgroupXor', + offset: number +): undefined | Error { + const expected = new Map(); + for (let i = 0; i < Math.floor(metadata.length / 2); i++) { + const group_id = metadata[i + Math.floor(metadata.length / 2)]; + let expect = expected.get(group_id) ?? identity(op); + expect = bitwise(op, expect, i + offset); + expected.set(group_id, expect); + } + + let numEles = 1; + let stride = 1; + if (type instanceof VectorType) { + numEles = type.width; + stride = numEles === 3 ? 4 : numEles; + } + for (let inv = 0; inv < Math.floor(output.length / stride); inv++) { + const group_id = metadata[inv + Math.floor(metadata.length / 2)]; + const expect = expected.get(group_id) ?? 0; + for (let ele = 0; ele < numEles; ele++) { + const res = output[inv * stride + ele]; + if (res !== expect) { + return new Error(`Invocation ${inv}, component ${ele}: incorrect result +- expected: ${expect} +- got: ${res}`); + } + } + } + + return undefined; +} + +g.test('data_types') + .desc('Tests allowed data types') + .params(u => + u + .combine('type', keysOf(kTypes)) + .beginSubcases() + .combine('wgSize', kWGSizes) + .combine('op', kOps) + ) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(async t => { + const type = kTypes[t.params.type]; + let numEles = 1; + if (type instanceof VectorType) { + numEles = type.width === 3 ? 4 : type.width; + } + + const scalarTy = scalarTypeOf(type); + + const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2]; + + const wgsl = ` +enable subgroups; + +@group(0) @binding(0) +var inputs : array; + +@group(0) @binding(1) +var outputs : array<${type.toString()}>; + +struct Metadata { + id : array, + group_id : array +} + +@group(0) @binding(2) +var metadata : Metadata; + +@compute @workgroup_size(${t.params.wgSize[0]}, ${t.params.wgSize[1]}, ${t.params.wgSize[2]}) +fn main( + @builtin(local_invocation_index) lid : u32, + @builtin(subgroup_invocation_id) id : u32, +) { + + // Record subgroup invocation id for this invocation. + metadata.id[lid] = id; + + // Record a unique id for this subgroup (avoid 0). + let group_id = subgroupBroadcastFirst(lid + 1); + metadata.group_id[lid] = group_id; + + outputs[lid] = ${t.params.op}(${type.toString()}(${scalarTy.toString()}(lid + inputs[0]))); +}`; + + const magicOffset = 0x7fff000f; + await runComputeTest( + t, + wgsl, + [t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]], + numEles, + new Uint32Array([magicOffset]), + (metadata: Uint32Array, output: Uint32Array) => { + return checkDataTypes(metadata, output, type, t.params.op, magicOffset); + } + ); + }); + +/** + * Generates randomized input data + * + * Case 0: All 0s + * Case 1: All 0xffffs + * Case 2-9: All identity values except an inverted value randomly every 32 values. + * All values capped to 0xffff + * Case 10+: Random values in the range [0, 2 ** 30] + * @param seed The PRNG seed + * @param num The number of values to generate + * @param identity The identity value for the operation + */ +function generateInputData(seed: number, num: number, identity: number): Uint32Array { + const prng = new PRNG(seed); + + const bound = Math.min(num, 32); + const index = prng.uniformInt(bound); + + return new Uint32Array([ + ...iterRange(num, x => { + if (seed === 0) { + return 0; + } else if (seed === 1) { + return 0xffff; + } else if (seed < 10) { + const bounded = x % bound; + let val = bounded === index ? ~identity : identity; + val &= 0xffff; + return val; + } + return prng.uniformInt(1 << 30); + }), + ]); +} + +/** + * Checks the result of compute tests + * + * Calculates the expected results for each subgroup and compares against + * the actual output. + * @param metadata An array divided as follows: + * * first half: subgroup invocation id in lower 16 bits + * subgroup size in upper 16 bits + * * second half: unique subgroup id + * @param output The outputs + * @param input The input data + * @param op The subgroup operation + * @param filter A predicate used to filter invocations. + */ +function checkBitwiseCompute( + metadata: Uint32Array, + output: Uint32Array, + input: Uint32Array, + op: 'subgroupAnd' | 'subgroupOr' | 'subgroupXor', + filter: (id: number, size: number) => boolean +): undefined | Error { + const expected = new Map(); + for (let i = 0; i < output.length; i++) { + const group_id = metadata[i + output.length]; + const combo = metadata[i]; + const id = combo & 0xffff; + const size = (combo >> 16) & 0xffff; + if (filter(id, size)) { + let expect = expected.get(group_id) ?? identity(op); + expect = bitwise(op, expect, input[i]); + expected.set(group_id, expect); + } + } + + for (let i = 0; i < output.length; i++) { + const group_id = metadata[i + output.length]; + const combo = metadata[i]; + const id = combo & 0xffff; + const size = (combo >> 16) & 0xffff; + const res = output[i]; + if (filter(id, size)) { + const expect = expected.get(group_id) ?? 0; + if (res !== expect) { + return new Error(`Invocation ${i}: incorrect result +- expected: ${expect} +- got: ${res}`); + } + } else { + if (res !== kDataSentinel) { + return new Error(`Invocation ${i}: unexpected write`); + } + } + } + + return undefined; +} + +g.test('compute,all_active') + .desc('Test bitwise operations with randomized inputs') + .params(u => + u + .combine('case', [...iterRange(kNumCases, x => x)]) + .beginSubcases() + .combine('wgSize', kWGSizes) + .combine('op', kOps) + ) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(async t => { + const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2]; + + const wgsl = ` +enable subgroups; + +@group(0) @binding(0) +var inputs : array; + +@group(0) @binding(1) +var outputs : array; + +struct Metadata { + id_and_size : array, + group_id : array +} + +@group(0) @binding(2) +var metadata : Metadata; + +@compute @workgroup_size(${t.params.wgSize[0]}, ${t.params.wgSize[1]}, ${t.params.wgSize[2]}) +fn main( + @builtin(local_invocation_index) lid : u32, + @builtin(subgroup_invocation_id) id : u32, + @builtin(subgroup_size) sg_size : u32, +) { + + // Record both subgroup invocation id and subgroup size in the same u32. + // Subgroups sizes are in the range [4, 128] so both values fit. + metadata.id_and_size[lid] = id | (sg_size << 16); + + // Record a unique id for this subgroup (avoid 0). + let group_id = subgroupBroadcastFirst(lid + 1); + metadata.group_id[lid] = group_id; + + outputs[lid] = ${t.params.op}(inputs[lid]); +}`; + + const inputData = generateInputData(t.params.case, wgThreads, identity(t.params.op)); + const uintsPerOutput = 1; + await runComputeTest( + t, + wgsl, + [t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]], + uintsPerOutput, + inputData, + (metadata: Uint32Array, output: Uint32Array) => { + return checkBitwiseCompute( + metadata, + output, + inputData, + t.params.op, + (id: number, size: number) => { + return true; + } + ); + } + ); + }); + +g.test('compute,split') + .desc('Test that only active invocations participate') + .params(u => + u + .combine('predicate', keysOf(kPredicateCases)) + .beginSubcases() + .combine('wgSize', kWGSizes) + .combine('op', kOps) + .combine('case', [...iterRange(kNumCases, x => x)]) + ) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(async t => { + const testcase = kPredicateCases[t.params.predicate]; + const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2]; + + const wgsl = ` +enable subgroups; + +@group(0) @binding(0) +var inputs : array; + +@group(0) @binding(1) +var outputs : array; + +struct Metadata { + id_and_size : array, + group_id : array +} + +@group(0) @binding(2) +var metadata : Metadata; + +@compute @workgroup_size(${t.params.wgSize[0]}, ${t.params.wgSize[1]}, ${t.params.wgSize[2]}) +fn main( + @builtin(local_invocation_index) lid : u32, + @builtin(subgroup_invocation_id) id : u32, + @builtin(subgroup_size) subgroupSize : u32, +) { + + // Record both subgroup invocation id and subgroup size in the same u32. + // Subgroups sizes are in the range [4, 128] so both values fit. + metadata.id_and_size[lid] = id | (subgroupSize << 16); + + // Record a unique id for this subgroup (avoid 0). + let group_id = subgroupBroadcastFirst(lid + 1); + metadata.group_id[lid] = group_id; + + if ${testcase.cond} { + outputs[lid] = ${t.params.op}(inputs[lid]); + } else { + return; + } +}`; + + const inputData = generateInputData(t.params.case, wgThreads, identity(t.params.op)); + const uintsPerOutput = 1; + await runComputeTest( + t, + wgsl, + [t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]], + uintsPerOutput, + inputData, + (metadata: Uint32Array, output: Uint32Array) => { + return checkBitwiseCompute(metadata, output, inputData, t.params.op, testcase.filter); + } + ); + }); + +/** + * Checks bitwise ops results from a fragment shader. + * + * Avoids the last row and column to skip potential helper invocations. + * @param data Framebuffer output + * * component 0 is result + * * component 1 is generated subgroup id + * @param input An array of input data + * @param op The subgroup operation + * @param format The framebuffer format + * @param width Framebuffer width + * @param height Framebuffer height + */ +function checkBitwiseFragment( + data: Uint32Array, + input: Uint32Array, + op: 'subgroupAnd' | 'subgroupOr' | 'subgroupXor', + format: GPUTextureFormat, + width: number, + height: number +): Error | undefined { + const { blockWidth, blockHeight, bytesPerBlock } = kTextureFormatInfo[format]; + const blocksPerRow = width / blockWidth; + // 256 minimum comes from image copy requirements. + const bytesPerRow = align(blocksPerRow * (bytesPerBlock ?? 1), 256); + const uintsPerRow = bytesPerRow / 4; + const uintsPerTexel = (bytesPerBlock ?? 1) / blockWidth / blockHeight / 4; + + // Iteration skips last row and column to avoid helper invocations because it is not + // guaranteed whether or not they participate in the subgroup operation. + const expected = new Map(); + for (let row = 0; row < height - 1; row++) { + for (let col = 0; col < width - 1; col++) { + const offset = uintsPerRow * row + col * uintsPerTexel; + const subgroup_id = data[offset + 1]; + + if (subgroup_id === 0) { + return new Error(`Internal error: helper invocation at (${col}, ${row})`); + } + + let v = expected.get(subgroup_id) ?? identity(op); + v = bitwise(op, v, input[row * width + col]); + expected.set(subgroup_id, v); + } + } + + for (let row = 0; row < height - 1; row++) { + for (let col = 0; col < width - 1; col++) { + const offset = uintsPerRow * row + col * uintsPerTexel; + const res = data[offset]; + const subgroup_id = data[offset + 1]; + + if (subgroup_id === 0) { + // Inactive in the fragment. + continue; + } + + const expected_v = expected.get(subgroup_id) ?? 0; + if (expected_v !== res) { + return new Error(`Row ${row}, col ${col}: incorrect results: +- expected: ${expected_v} +- got: ${res}`); + } + } + } + + return undefined; +} + +g.test('fragment,all_active') + .desc('Tests bitwise operations in fragment shaders') + .params(u => + u + .combine('size', kFramebufferSizes) + .beginSubcases() + .combine('case', [...iterRange(kNumCases, x => x)]) + .combine('op', kOps) + .combineWithParams([{ format: 'rg32uint' }] as const) + ) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(async t => { + const numInputs = t.params.size[0] * t.params.size[1]; + const inputData = generateInputData(t.params.case, numInputs, identity(t.params.op)); + + const ident = identity(t.params.op) === 0 ? '0' : '0xffffffff'; + const fsShader = ` +enable subgroups; + +@group(0) @binding(0) +var inputs : array; + +@fragment +fn main( + @builtin(position) pos : vec4f, +) -> @location(0) vec2u { + // Generate a subgroup id based on linearized position, avoid 0. + let linear = u32(pos.x) + u32(pos.y) * ${t.params.size[0]}; + let subgroup_id = subgroupBroadcastFirst(linear + 1); + + // Filter out possible helper invocations. + let x_in_range = u32(pos.x) < (${t.params.size[0]} - 1); + let y_in_range = u32(pos.y) < (${t.params.size[1]} - 1); + let in_range = x_in_range && y_in_range; + let input = select(${ident}, inputs[linear], in_range); + + let res = ${t.params.op}(input); + return vec2u(res, subgroup_id); +}`; + + await runFragmentTest( + t, + t.params.format, + fsShader, + t.params.size[0], + t.params.size[1], + inputData, + (data: Uint32Array) => { + return checkBitwiseFragment( + data, + inputData, + t.params.op, + t.params.format, + t.params.size[0], + t.params.size[1] + ); + } + ); + }); + +g.test('fragment,split').unimplemented(); diff --git a/src/webgpu/shader/execution/expression/call/builtin/subgroupBroadcast.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/subgroupBroadcast.spec.ts index b2fa9e46ec7a..75fe27e8cb5d 100644 --- a/src/webgpu/shader/execution/expression/call/builtin/subgroupBroadcast.spec.ts +++ b/src/webgpu/shader/execution/expression/call/builtin/subgroupBroadcast.spec.ts @@ -318,6 +318,4 @@ fn main(@builtin(subgroup_invocation_id) id : u32, t.expectGPUBufferValuesEqual(outputBuffer, new Uint32Array(expect)); }); -g.test('dynamically_uniform_id').unimplemented(); - g.test('fragment').unimplemented(); diff --git a/src/webgpu/shader/execution/expression/call/builtin/subgroupMul.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/subgroupMul.spec.ts new file mode 100644 index 000000000000..d45c023cd17a --- /dev/null +++ b/src/webgpu/shader/execution/expression/call/builtin/subgroupMul.spec.ts @@ -0,0 +1,387 @@ +export const description = ` +Execution tests for subgroupMul, subgroupExclusiveMul, and subgroupInclusiveMul + +Note: There is a lack of portability for non-uniform execution so these tests +restrict themselves to uniform control flow. +Note: There is no guaranteed mapping between subgroup_invocation_id and +local_invocation_index. Tests should avoid assuming there is. +`; + +import { makeTestGroup } from '../../../../../../common/framework/test_group.js'; +import { keysOf, objectsToRecord } from '../../../../../../common/util/data_tables.js'; +import { iterRange } from '../../../../../../common/util/util.js'; +import { GPUTest } from '../../../../../gpu_test.js'; +import { + kConcreteNumericScalarsAndVectors, + Type, + VectorType, + numberToFloatBits, + floatBitsToNumber, + kFloat32Format, + kFloat16Format, + scalarTypeOf, +} from '../../../../../util/conversion.js'; +import { FP } from '../../../../../util/floating_point.js'; + +import { + kNumCases, + kStride, + kWGSizes, + kPredicateCases, + runAccuracyTest, + runComputeTest, +} from './subgroup_util.js'; + +export const g = makeTestGroup(GPUTest); + +const kIdentity = 1; + +const kDataTypes = objectsToRecord(kConcreteNumericScalarsAndVectors); + +const kOperations = ['subgroupMul', 'subgroupExclusiveMul', 'subgroupInclusiveMul'] as const; + +g.test('fp_accuracy') + .desc( + `Tests the accuracy of floating-point multiplication. + +The order of operations is implementation defined, most threads are filled with +the identity value and two receive random values. +Subgroup sizes are not known ahead of time so some cases may not perform any +interesting operations. The test biases towards checking subgroup sizes under 64. +These tests only check two values in order to reuse more of the existing infrastructure +and limit the number of permutations needed to calculate the final result.` + ) + .params(u => + u + .combine('case', [...iterRange(kNumCases, x => x)]) + .combine('type', ['f32', 'f16'] as const) + .combine('wgSize', [ + [kStride, 1, 1], + [kStride / 2, 2, 1], + ] as const) + ) + .beforeAllSubcases(t => { + const features: GPUFeatureName[] = ['subgroups' as GPUFeatureName]; + if (t.params.type === 'f16') { + features.push('shader-f16'); + features.push('subgroups-f16' as GPUFeatureName); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(async t => { + await runAccuracyTest( + t, + t.params.case, + [t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]], + 'subgroupMul', + t.params.type, + kIdentity, + t.params.type === 'f16' ? FP.f16.multiplicationInterval : FP.f32.multiplicationInterval + ); + }); + +/** + * Checks subgroup multiplications. + * + * Expected results: + * - subgroupMul: each invocation should have result equal to 2 to the real subgroup size + * - subgroupExclusiveMul: each invocation should have result equal to 2 to its subgroup invocation id + * - subgroupInclusiveMul: each invocation should be equal to subgroupExclusiveMul result multiplied by the fill value + * @param metadata An array containing actual subgroup size per invocation followed by + * subgroup invocation id per invocation + * @param output An array of multiplications + * @param type The data type + * @param operation Type of multiplication + * @param expectedFillValue The original value used to fill the test array + */ +function checkMultiplication( + metadata: Uint32Array, + output: Uint32Array, + type: Type, + operation: 'subgroupMul' | 'subgroupExclusiveMul' | 'subgroupInclusiveMul', + expectedfillValue: number +): undefined | Error { + let numEles = 1; + if (type instanceof VectorType) { + numEles = type.width; + } + const scalarTy = scalarTypeOf(type); + const expectedOffset = operation === 'subgroupMul' ? 0 : metadata.length / 2; + for (let i = 0; i < metadata.length / 2; i++) { + let expected = Math.pow(2, metadata[i + expectedOffset]); + if (operation === 'subgroupInclusiveMul') { + expected *= expectedfillValue; + } + for (let j = 0; j < numEles; j++) { + let idx = i * numEles + j; + const isOdd = idx & 0x1; + if (scalarTy === Type.f16) { + idx = Math.floor(idx / 2); + } + let val = output[idx]; + if (scalarTy === Type.f32) { + val = floatBitsToNumber(val, kFloat32Format); + } else if (scalarTy === Type.f16) { + if (isOdd) { + val = val >> 16; + } + val = floatBitsToNumber(val & 0xffff, kFloat16Format); + } + if (expected !== val) { + return new Error(`Invocation ${i}, component ${j}: incorrect result +- expected: ${expected} +- got: ${val}`); + } + } + } + + return undefined; +} + +g.test('data_types') + .desc( + `Tests subgroup multiplication for valid data types + +Tests a simple multiplication of all 2 values. +Reductions expect result to be equal to actual subgroup size. +Exclusice scans expect result to be equal subgroup invocation id. + +TODO: support vec3 types. + ` + ) + .params(u => + u + .combine('type', keysOf(kDataTypes)) + .filter(t => { + const type = kDataTypes[t.type]; + if (type instanceof VectorType) { + return type.width !== 3; + } + return true; + }) + .beginSubcases() + // Workgroup sizes are kept < 16 to avoid overflows. + // Other tests cover that the full subgroup will contribute. + .combine('wgSize', [ + [4, 1, 1], + [8, 1, 1], + [1, 4, 1], + [1, 8, 1], + [1, 1, 4], + [1, 1, 8], + [2, 2, 2], + [4, 2, 1], + [4, 1, 2], + [2, 4, 1], + [2, 1, 4], + [1, 4, 2], + [1, 2, 4], + [3, 3, 1], + [3, 1, 3], + [1, 3, 3], + ] as const) + .combine('operation', kOperations) + ) + .beforeAllSubcases(t => { + const features: GPUFeatureName[] = ['subgroups' as GPUFeatureName]; + const type = kDataTypes[t.params.type]; + if (type.requiresF16()) { + features.push('shader-f16'); + features.push('subgroups-f16' as GPUFeatureName); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(async t => { + const type = kDataTypes[t.params.type]; + let numEles = 1; + if (type instanceof VectorType) { + numEles = type.width; + } + const scalarType = scalarTypeOf(type); + let enables = 'enable subgroups;\n'; + if (type.requiresF16()) { + enables += 'enable f16;\nenable subgroups_f16;\n'; + } + + const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2]; + + const wgsl = ` +${enables} + +@group(0) @binding(0) +var inputs : array<${type.toString()}>; + +@group(0) @binding(1) +var outputs : array<${type.toString()}>; + +struct Metadata { + subgroup_size : array, + subgroup_invocation_id : array, +} + +@group(0) @binding(2) +var metadata : Metadata; + +@compute @workgroup_size(${t.params.wgSize[0]}, ${t.params.wgSize[1]}, ${t.params.wgSize[2]}) +fn main( + @builtin(local_invocation_index) lid : u32, + @builtin(subgroup_invocation_id) id : u32, +) { + // Record the actual subgroup size for this invocation. + // Note: subgroup_size builtin is always a power-of-2 and might be larger + // if the subgroup is not full. + let ballot = subgroupBallot(true); + var size = countOneBits(ballot.x); + size += countOneBits(ballot.y); + size += countOneBits(ballot.z); + size += countOneBits(ballot.w); + metadata.subgroup_size[lid] = size; + + // Record subgroup invocation id for this invocation. + metadata.subgroup_invocation_id[lid] = id; + + outputs[lid] = ${t.params.operation}(inputs[lid]); +}`; + + const expectedfillValue = 2; + let fillValue = expectedfillValue; + let numUints = wgThreads * numEles; + if (scalarType === Type.f32) { + fillValue = numberToFloatBits(fillValue, kFloat32Format); + } else if (scalarType === Type.f16) { + const f16 = numberToFloatBits(fillValue, kFloat16Format); + fillValue = f16 | (f16 << 16); + numUints = Math.ceil(numUints / 2); + } + await runComputeTest( + t, + wgsl, + [t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]], + numUints, + new Uint32Array([...iterRange(numUints, x => fillValue)]), + (metadata: Uint32Array, output: Uint32Array) => { + return checkMultiplication(metadata, output, type, t.params.operation, expectedfillValue); + } + ); + }); + +g.test('fragment').unimplemented(); + +/** + * Performs correctness checking for predicated multiplications + * + * Assumes the shader performs a predicated subgroup multiplication with the + * subgroup_invocation_id as the data. + * + * @param metadata An array containing subgroup sizes and subgroup invocation ids + * @param output An array containing the output results + * @param operation The type of multiplication + * @param filter A functor that mirrors the predication in the shader + */ +function checkPredicatedMultiplication( + metadata: Uint32Array, + output: Uint32Array, + operation: 'subgroupMul' | 'subgroupExclusiveMul' | 'subgroupInclusiveMul', + filter: (id: number, size: number) => boolean +): Error | undefined { + for (let i = 0; i < output.length; i++) { + const size = metadata[i]; + const id = metadata[output.length + i]; + let expected = 1; + if (filter(id, size)) { + // This function replicates the behavior in the shader. + const valueModFun = function (id: number) { + return (id % 4) + 1; + }; + const bound = + operation === 'subgroupInclusiveMul' ? id + 1 : operation === 'subgroupMul' ? size : id; + for (let j = 0; j < bound; j++) { + if (filter(j, size)) { + expected *= valueModFun(j); + } + } + } else { + expected = 999; + } + if (expected !== output[i]) { + return new Error(`Invocation ${i}: incorrect result +- expected: ${expected} +- got: ${output[i]}`); + } + } + return undefined; +} + +g.test('compute,split') + .desc('Tests that only active invocations contribute to the operation') + .params(u => + u + .combine('case', keysOf(kPredicateCases)) + .beginSubcases() + .combine('operation', kOperations) + .combine('wgSize', kWGSizes) + ) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(async t => { + const testcase = kPredicateCases[t.params.case]; + const outputUintsPerElement = 1; + const inputData = new Uint32Array([0]); // no input data + const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2]; + + const wgsl = ` +enable subgroups; + +@group(0) @binding(0) +var input : array; + +@group(0) @binding(1) +var outputs : array; + +struct Metadata { + subgroup_size : array, + subgroup_invocation_id : array, +} + +@group(0) @binding(2) +var metadata : Metadata; + +@compute @workgroup_size(${t.params.wgSize[0]}, ${t.params.wgSize[1]}, ${t.params.wgSize[2]}) +fn main( + @builtin(local_invocation_index) lid : u32, + @builtin(subgroup_invocation_id) id : u32, +) { + _ = input[0]; + + // Record the actual subgroup size for this invocation. + // Note: subgroup_size builtin is always a power-of-2 and might be larger + // if the subgroup is not full. + let ballot = subgroupBallot(true); + var subgroupSize = countOneBits(ballot.x); + subgroupSize += countOneBits(ballot.y); + subgroupSize += countOneBits(ballot.z); + subgroupSize += countOneBits(ballot.w); + metadata.subgroup_size[lid] = subgroupSize; + + // Record subgroup invocation id for this invocation. + metadata.subgroup_invocation_id[lid] = id; + + if ${testcase.cond} { + outputs[lid] = ${t.params.operation}((id % 4) + 1); + } else { + return; + } +}`; + + await runComputeTest( + t, + wgsl, + [t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]], + outputUintsPerElement, + inputData, + (metadata: Uint32Array, output: Uint32Array) => { + return checkPredicatedMultiplication(metadata, output, t.params.operation, testcase.filter); + } + ); + }); diff --git a/src/webgpu/shader/execution/expression/call/builtin/subgroup_util.ts b/src/webgpu/shader/execution/expression/call/builtin/subgroup_util.ts new file mode 100644 index 000000000000..9438c265d7df --- /dev/null +++ b/src/webgpu/shader/execution/expression/call/builtin/subgroup_util.ts @@ -0,0 +1,555 @@ +import { assert, iterRange } from '../../../../../../common/util/util.js'; +import { Float16Array } from '../../../../../../external/petamoriken/float16/float16.js'; +import { kTextureFormatInfo } from '../../../../../format_info.js'; +import { GPUTest, TextureTestMixin } from '../../../../../gpu_test.js'; +import { FPInterval } from '../../../../../util/floating_point.js'; +import { sparseScalarF16Range, sparseScalarF32Range, align } from '../../../../../util/math.js'; +import { PRNG } from '../../../../../util/prng.js'; + +export class SubgroupTest extends TextureTestMixin(GPUTest) {} + +export const kNumCases = 1000; +export const kStride = 128; + +export const kWGSizes = [ + [4, 1, 1], + [8, 1, 1], + [16, 1, 1], + [32, 1, 1], + [64, 1, 1], + [128, 1, 1], + [256, 1, 1], + [1, 4, 1], + [1, 8, 1], + [1, 16, 1], + [1, 32, 1], + [1, 64, 1], + [1, 128, 1], + [1, 256, 1], + [1, 1, 4], + [1, 1, 8], + [1, 1, 16], + [1, 1, 32], + [1, 1, 64], + [3, 3, 3], + [4, 4, 4], + [16, 16, 1], + [16, 1, 16], + [1, 16, 16], + [15, 3, 3], + [3, 15, 3], + [3, 3, 15], +] as const; + +export const kPredicateCases = { + every_even: { + cond: `id % 2 == 0`, + filter: (id: number, size: number) => { + return id % 2 === 0; + }, + }, + every_odd: { + cond: `id % 2 == 1`, + filter: (id: number, size: number) => { + return id % 2 === 1; + }, + }, + lower_half: { + cond: `id < subgroupSize / 2`, + filter: (id: number, size: number) => { + return id < Math.floor(size / 2); + }, + }, + upper_half: { + cond: `id >= subgroupSize / 2`, + filter: (id: number, size: number) => { + return id >= Math.floor(size / 2); + }, + }, + first_two: { + cond: `id == 0 || id == 1`, + filter: (id: number) => { + return id === 0 || id === 1; + }, + }, +}; + +/** + * Check the accuracy of the reduction operation. + * + * @param metadata An array containing subgroup ids for each invocation + * @param output An array containing the results of the reduction for each invocation + * @param indices An array of two values containing the indices of the interesting values in the input + * @param values An array of two values containing the interesting values in the input + * @param identity The identity for the operation + * @param intervalGen A functor to generate an appropriate FPInterval for a binary operation + */ +function checkAccuracy( + metadata: Uint32Array, + output: Float32Array | Float16Array, + indices: number[], + values: number[], + identity: number, + intervalGen: (x: number | FPInterval, y: number | FPInterval) => FPInterval +): undefined | Error { + const subgroupIdIdx1 = metadata[indices[0]]; + const subgroupIdIdx2 = metadata[indices[1]]; + for (let i = 0; i < output.length; i++) { + const subgroupId = metadata[i]; + + const v1 = subgroupId === subgroupIdIdx1 ? values[0] : identity; + const v2 = subgroupId === subgroupIdIdx2 ? values[1] : identity; + const interval = intervalGen(v1, v2); + if (!interval.contains(output[i])) { + return new Error(`Invocation ${i}, subgroup id ${subgroupId}: incorrect result +- interval: ${interval.toString()} +- output: ${output[i]}`); + } + } + + return undefined; +} + +/** + * Run a floating-point accuracy subgroup test. + * + * @param t The base test + * @param seed A seed for the PRNG + * @param wgSize An array for the workgroup size + * @param operation The subgroup operation + * @param type The type (f16 or f32) + * @param identity The identity for the operation + * @param intervalGen A functor to generate an appropriate FPInterval for a binary operation + */ +export async function runAccuracyTest( + t: GPUTest, + seed: number, + wgSize: number[], + operation: string, + type: 'f16' | 'f32', + identity: number, + intervalGen: (x: number | FPInterval, y: number | FPInterval) => FPInterval +) { + assert(seed < kNumCases); + const prng = new PRNG(seed); + + // Compatibility mode has lower workgroup limits. + const wgThreads = wgSize[0] * wgSize[1] * wgSize[2]; + const { + maxComputeInvocationsPerWorkgroup, + maxComputeWorkgroupSizeX, + maxComputeWorkgroupSizeY, + maxComputeWorkgroupSizeZ, + } = t.device.limits; + t.skipIf( + maxComputeInvocationsPerWorkgroup < wgThreads || + maxComputeWorkgroupSizeX < wgSize[0] || + maxComputeWorkgroupSizeY < wgSize[1] || + maxComputeWorkgroupSizeZ < wgSize[2], + 'Workgroup size too large' + ); + + // Bias half the cases to lower indices since most subgroup sizes are <= 64. + let indexLimit = kStride; + if (seed < kNumCases / 4) { + indexLimit = 16; + } else if (seed < kNumCases / 2) { + indexLimit = 64; + } + + // Ensure two distinct indices are picked. + const idx1 = prng.uniformInt(indexLimit); + let idx2 = prng.uniformInt(indexLimit - 1); + if (idx1 === idx2) { + idx2++; + } + assert(idx2 < indexLimit); + + // Select two random values. + const range = type === 'f16' ? sparseScalarF16Range() : sparseScalarF32Range(); + const numVals = range.length; + const val1 = range[prng.uniformInt(numVals)]; + const val2 = range[prng.uniformInt(numVals)]; + + const extraEnables = type === 'f16' ? `enable f16;\nenable subgroups_f16;` : ``; + const wgsl = ` +enable subgroups; +${extraEnables} + +@group(0) @binding(0) +var inputs : array<${type}>; + +@group(0) @binding(1) +var outputs : array<${type}>; + +struct Metadata { + subgroup_id : array, +} + +@group(0) @binding(2) +var metadata : Metadata; + +@compute @workgroup_size(${wgSize[0]}, ${wgSize[1]}, ${wgSize[2]}) +fn main( + @builtin(local_invocation_index) lid : u32, +) { + metadata.subgroup_id[lid] = subgroupBroadcast(lid, 0); + outputs[lid] = ${operation}(inputs[lid]); +}`; + + const inputData = + type === 'f16' + ? new Float16Array([ + ...iterRange(kStride, x => { + if (x === idx1) return val1; + if (x === idx2) return val2; + return identity; + }), + ]) + : new Float32Array([ + ...iterRange(kStride, x => { + if (x === idx1) return val1; + if (x === idx2) return val2; + return identity; + }), + ]); + + const inputBuffer = t.makeBufferWithContents( + inputData, + GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE + ); + t.trackForCleanup(inputBuffer); + + const outputBuffer = t.makeBufferWithContents( + new Float32Array([...iterRange(kStride, x => 0)]), + GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE + ); + t.trackForCleanup(outputBuffer); + + const numMetadata = kStride; + const metadataBuffer = t.makeBufferWithContents( + new Uint32Array([...iterRange(numMetadata, x => 0)]), + GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE + ); + + const pipeline = t.device.createComputePipeline({ + layout: 'auto', + compute: { + module: t.device.createShaderModule({ + code: wgsl, + }), + entryPoint: 'main', + }, + }); + const bg = t.device.createBindGroup({ + layout: pipeline.getBindGroupLayout(0), + entries: [ + { + binding: 0, + resource: { + buffer: inputBuffer, + }, + }, + { + binding: 1, + resource: { + buffer: outputBuffer, + }, + }, + { + binding: 2, + resource: { + buffer: metadataBuffer, + }, + }, + ], + }); + + const encoder = t.device.createCommandEncoder(); + const pass = encoder.beginComputePass(); + pass.setPipeline(pipeline); + pass.setBindGroup(0, bg); + pass.dispatchWorkgroups(1, 1, 1); + pass.end(); + t.queue.submit([encoder.finish()]); + + const metadataReadback = await t.readGPUBufferRangeTyped(metadataBuffer, { + srcByteOffset: 0, + type: Uint32Array, + typedLength: numMetadata, + method: 'copy', + }); + const metadata = metadataReadback.data; + + let output: Float16Array | Float32Array; + if (type === 'f16') { + const outputReadback = await t.readGPUBufferRangeTyped(outputBuffer, { + srcByteOffset: 0, + type: Float16Array, + typedLength: kStride, + method: 'copy', + }); + output = outputReadback.data; + } else { + const outputReadback = await t.readGPUBufferRangeTyped(outputBuffer, { + srcByteOffset: 0, + type: Float32Array, + typedLength: kStride, + method: 'copy', + }); + output = outputReadback.data; + } + + t.expectOK(checkAccuracy(metadata, output, [idx1, idx2], [val1, val2], identity, intervalGen)); +} + +// Repeat the bit pattern evey 16 bits for use with 16-bit types. +export const kDataSentinel = 999 | (999 << 16); + +/** + * Runs compute shader subgroup test + * + * The test makes the following assumptions: + * * group(0) binding(0) is a storage buffer for input data + * * group(0) binding(1) is an output storage buffer for outputUintsPerElement * wgSize uints + * * group(0) binding(2) is an output storage buffer for 2 * wgSize uints + * + * @param t The base test + * @param wgsl The shader code + * @param outputUintsPerElement number of uints output per invocation + * @param inputData the input data + * @param checkFunction a functor that takes the output storage buffer data to check result validity + */ +export async function runComputeTest( + t: GPUTest, + wgsl: string, + wgSize: number[], + outputUintsPerElement: number, + inputData: Uint32Array, + checkFunction: (metadata: Uint32Array, output: Uint32Array) => Error | undefined +) { + // Compatibility mode has lower workgroup limits. + const wgThreads = wgSize[0] * wgSize[1] * wgSize[2]; + const { + maxComputeInvocationsPerWorkgroup, + maxComputeWorkgroupSizeX, + maxComputeWorkgroupSizeY, + maxComputeWorkgroupSizeZ, + } = t.device.limits; + t.skipIf( + maxComputeInvocationsPerWorkgroup < wgThreads || + maxComputeWorkgroupSizeX < wgSize[0] || + maxComputeWorkgroupSizeY < wgSize[1] || + maxComputeWorkgroupSizeZ < wgSize[2], + 'Workgroup size too large' + ); + + const inputBuffer = t.makeBufferWithContents( + inputData, + GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE + ); + t.trackForCleanup(inputBuffer); + + const outputUints = outputUintsPerElement * wgThreads; + const outputBuffer = t.makeBufferWithContents( + new Uint32Array([...iterRange(outputUints, x => kDataSentinel)]), + GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE + ); + t.trackForCleanup(outputBuffer); + + const numMetadata = 2 * wgThreads; + const metadataBuffer = t.makeBufferWithContents( + new Uint32Array([...iterRange(numMetadata, x => kDataSentinel)]), + GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE + ); + + const pipeline = t.device.createComputePipeline({ + layout: 'auto', + compute: { + module: t.device.createShaderModule({ + code: wgsl, + }), + }, + }); + const bg = t.device.createBindGroup({ + layout: pipeline.getBindGroupLayout(0), + entries: [ + { + binding: 0, + resource: { + buffer: inputBuffer, + }, + }, + { + binding: 1, + resource: { + buffer: outputBuffer, + }, + }, + { + binding: 2, + resource: { + buffer: metadataBuffer, + }, + }, + ], + }); + + const encoder = t.device.createCommandEncoder(); + const pass = encoder.beginComputePass(); + pass.setPipeline(pipeline); + pass.setBindGroup(0, bg); + pass.dispatchWorkgroups(1, 1, 1); + pass.end(); + t.queue.submit([encoder.finish()]); + + const metadataReadback = await t.readGPUBufferRangeTyped(metadataBuffer, { + srcByteOffset: 0, + type: Uint32Array, + typedLength: numMetadata, + method: 'copy', + }); + const metadata = metadataReadback.data; + + const outputReadback = await t.readGPUBufferRangeTyped(outputBuffer, { + srcByteOffset: 0, + type: Uint32Array, + typedLength: outputUints, + method: 'copy', + }); + const output = outputReadback.data; + + t.expectOK(checkFunction(metadata, output)); +} + +// Minimum size is [3, 3]. +export const kFramebufferSizes = [ + [15, 15], + [16, 16], + [17, 17], + [19, 13], + [13, 10], + [111, 3], + [3, 111], + [35, 3], + [3, 35], + [53, 13], + [13, 53], + [3, 3], +] as const; + +/** + * Runs a subgroup builtin test for fragment shaders + * + * This test draws a full screen triangle. + * Tests should avoid checking the last row or column to avoid helper + * invocations. Underlying APIs do not consistently guarantee whether + * helper invocations participate in subgroup operations. + * @param t The base test + * @param format The framebuffer format + * @param fsShader The fragment shader with the following interface: + * Location 0 output is framebuffer with format + * Group 0 binding 0 is input data + * @param width The framebuffer width + * @param height The framebuffer height + * @param inputData The input data + * @param checker A functor to check the framebuffer values + */ +export async function runFragmentTest( + t: SubgroupTest, + format: GPUTextureFormat, + fsShader: string, + width: number, + height: number, + inputData: Uint32Array | Float32Array | Float16Array, + checker: (data: Uint32Array) => Error | undefined +) { + const vsShader = ` +@vertex +fn vsMain(@builtin(vertex_index) index : u32) -> @builtin(position) vec4f { + const vertices = array( + vec2(-2, 4), vec2(-2, -4), vec2(2, 0), + ); + return vec4f(vec2f(vertices[index]), 0, 1); +}`; + + assert(width >= 3, 'Minimum width is 3'); + assert(height >= 3, 'Minimum height is 3'); + const pipeline = t.device.createRenderPipeline({ + layout: 'auto', + vertex: { + module: t.device.createShaderModule({ code: vsShader }), + }, + fragment: { + module: t.device.createShaderModule({ code: fsShader }), + targets: [{ format }], + }, + primitive: { + topology: 'triangle-list', + }, + }); + + const { blockWidth, blockHeight, bytesPerBlock } = kTextureFormatInfo[format]; + assert(bytesPerBlock !== undefined); + + const blocksPerRow = width / blockWidth; + const blocksPerColumn = height / blockHeight; + // 256 minimum arises from image copy requirements. + const bytesPerRow = align(blocksPerRow * (bytesPerBlock ?? 1), 256); + const byteLength = bytesPerRow * blocksPerColumn; + const uintLength = byteLength / 4; + + const buffer = t.makeBufferWithContents( + inputData, + GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST + ); + + const bg = t.device.createBindGroup({ + layout: pipeline.getBindGroupLayout(0), + entries: [ + { + binding: 0, + resource: { + buffer, + }, + }, + ], + }); + + const framebuffer = t.createTextureTracked({ + size: [width, height], + usage: + GPUTextureUsage.COPY_SRC | + GPUTextureUsage.COPY_DST | + GPUTextureUsage.RENDER_ATTACHMENT | + GPUTextureUsage.TEXTURE_BINDING, + format, + }); + + const encoder = t.device.createCommandEncoder(); + const pass = encoder.beginRenderPass({ + colorAttachments: [ + { + view: framebuffer.createView(), + loadOp: 'clear', + storeOp: 'store', + }, + ], + }); + pass.setPipeline(pipeline); + pass.setBindGroup(0, bg); + pass.draw(3); + pass.end(); + t.queue.submit([encoder.finish()]); + + const copyBuffer = t.copyWholeTextureToNewBufferSimple(framebuffer, 0); + const readback = await t.readGPUBufferRangeTyped(copyBuffer, { + srcByteOffset: 0, + type: Uint32Array, + typedLength: uintLength, + method: 'copy', + }); + const data: Uint32Array = readback.data; + + t.expectOK(checker(data)); +} diff --git a/src/webgpu/shader/execution/expression/call/builtin/textureGather.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/textureGather.spec.ts index 40b331efaba9..d2ba15adb969 100644 --- a/src/webgpu/shader/execution/expression/call/builtin/textureGather.spec.ts +++ b/src/webgpu/shader/execution/expression/call/builtin/textureGather.spec.ts @@ -1,6 +1,8 @@ export const description = ` Execution tests for the 'textureGather' builtin function +- TODO: Test un-encodable formats. + A texture gather operation reads from a 2D, 2D array, cube, or cube array texture, computing a four-component vector as follows: * Find the four texels that would be used in a sampling operation with linear filtering, from mip level 0: - Use the specified coordinate, array index (when present), and offset (when present). @@ -23,11 +25,38 @@ A texture gather operation reads from a 2D, 2D array, cube, or cube array textur `; import { makeTestGroup } from '../../../../../../common/framework/test_group.js'; -import { GPUTest } from '../../../../../gpu_test.js'; +import { + isDepthTextureFormat, + isEncodableTextureFormat, + kCompressedTextureFormats, + kDepthStencilFormats, + kEncodableTextureFormats, +} from '../../../../../format_info.js'; + +import { + appendComponentTypeForFormatToTextureType, + checkCallResults, + chooseTextureSize, + createTextureWithRandomDataAndGetTexels, + doTextureCalls, + generateSamplePointsCube, + generateTextureBuiltinInputs2D, + isFillable, + kCubeSamplePointMethods, + kSamplePointMethods, + kShortAddressModes, + kShortAddressModeToAddressMode, + kShortShaderStages, + skipIfNeedsFilteringAndIsUnfilterableOrSelectDevice, + TextureCall, + vec2, + vec3, + WGSLTextureSampleTest, +} from './texture_utils.js'; -import { generateCoordBoundaries, generateOffsets } from './utils.js'; +const kTestableColorFormats = [...kEncodableTextureFormats, ...kCompressedTextureFormats] as const; -export const g = makeTestGroup(GPUTest); +export const g = makeTestGroup(WGSLTextureSampleTest); g.test('sampled_2d_coords') .specURL('https://www.w3.org/TR/WGSL/#texturegather') @@ -55,22 +84,89 @@ Parameters: Values outside of this range will result in a shader-creation error. ` ) - .paramsSubcasesOnly(u => + .params(u => u - .combine('T', ['f32-only', 'i32', 'u32'] as const) - .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat']) + .combine('stage', kShortShaderStages) + .combine('format', kTestableColorFormats) + .filter(t => isFillable(t.format)) + .combine('filt', ['nearest', 'linear'] as const) + .combine('modeU', kShortAddressModes) + .combine('modeV', kShortAddressModes) + .combine('offset', [false, true] as const) + .beginSubcases() .combine('C', ['i32', 'u32'] as const) - .combine('C_value', [-1, 0, 1, 2, 3, 4] as const) - .combine('coords', generateCoordBoundaries(2)) - .combine('offset', generateOffsets(2)) + .combine('samplePoints', kSamplePointMethods) ) - .unimplemented(); + .beforeAllSubcases(t => { + t.skipIfTextureFormatNotSupported(t.params.format); + skipIfNeedsFilteringAndIsUnfilterableOrSelectDevice(t, t.params.filt, t.params.format); + }) + .fn(async t => { + const { format, C, samplePoints, stage, modeU, modeV, filt: minFilter, offset } = t.params; + + // We want at least 4 blocks or something wide enough for 3 mip levels. + const [width, height] = chooseTextureSize({ minSize: 8, minBlocks: 4, format }); + const descriptor: GPUTextureDescriptor = { + format, + size: { width, height }, + mipLevelCount: 3, + usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING, + }; + const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor); + const sampler: GPUSamplerDescriptor = { + addressModeU: kShortAddressModeToAddressMode[modeU], + addressModeV: kShortAddressModeToAddressMode[modeV], + minFilter, + magFilter: minFilter, + mipmapFilter: minFilter, + }; + + const calls: TextureCall[] = generateTextureBuiltinInputs2D(50, { + method: samplePoints, + textureBuiltin: 'textureGather', + sampler, + descriptor, + offset, + component: true, + hashInputs: [stage, format, C, samplePoints, modeU, modeV, minFilter, offset], + }).map(({ coords, component, offset }) => { + return { + builtin: 'textureGather', + coordType: 'f', + coords, + component, + componentType: C === 'i32' ? 'i' : 'u', + offset, + }; + }); + const textureType = appendComponentTypeForFormatToTextureType('texture_2d', format); + const viewDescriptor = {}; + const results = await doTextureCalls( + t, + texture, + viewDescriptor, + textureType, + sampler, + calls, + stage + ); + const res = await checkCallResults( + t, + { texels, descriptor, viewDescriptor }, + textureType, + sampler, + calls, + results, + stage, + texture + ); + t.expectOK(res); + }); g.test('sampled_3d_coords') .specURL('https://www.w3.org/TR/WGSL/#texturegather') .desc( ` -C: i32, u32 T: i32, u32, f32 fn textureGather(component: C, t: texture_cube, s: sampler, coords: vec3) -> vec4 @@ -85,15 +181,86 @@ Parameters: * coords: The texture coordinates ` ) - .paramsSubcasesOnly(u => + .params(u => u - .combine('T', ['f32-only', 'i32', 'u32'] as const) - .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat']) + .combine('stage', kShortShaderStages) + .combine('format', kTestableColorFormats) + .filter(t => isFillable(t.format)) + .combine('filt', ['nearest', 'linear'] as const) + .combine('mode', kShortAddressModes) + .beginSubcases() .combine('C', ['i32', 'u32'] as const) - .combine('C_value', [-1, 0, 1, 2, 3, 4] as const) - .combine('coords', generateCoordBoundaries(3)) + .combine('samplePoints', kCubeSamplePointMethods) ) - .unimplemented(); + .beforeAllSubcases(t => { + t.skipIfTextureFormatNotSupported(t.params.format); + skipIfNeedsFilteringAndIsUnfilterableOrSelectDevice(t, t.params.filt, t.params.format); + }) + .fn(async t => { + const { format, C, stage, samplePoints, mode, filt: minFilter } = t.params; + + const viewDimension: GPUTextureViewDimension = 'cube'; + const [width, height] = chooseTextureSize({ minSize: 8, minBlocks: 2, format, viewDimension }); + const depthOrArrayLayers = 6; + + const descriptor: GPUTextureDescriptor = { + format, + ...(t.isCompatibility && { textureBindingViewDimension: viewDimension }), + size: { width, height, depthOrArrayLayers }, + usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING, + mipLevelCount: 3, + }; + const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor); + const sampler: GPUSamplerDescriptor = { + addressModeU: kShortAddressModeToAddressMode[mode], + addressModeV: kShortAddressModeToAddressMode[mode], + addressModeW: kShortAddressModeToAddressMode[mode], + minFilter, + magFilter: minFilter, + mipmapFilter: minFilter, + }; + + const calls: TextureCall[] = generateSamplePointsCube(50, { + method: samplePoints, + sampler, + descriptor, + component: true, + textureBuiltin: 'textureGather', + hashInputs: [stage, format, C, samplePoints, mode, minFilter], + }).map(({ coords, component }) => { + return { + builtin: 'textureGather', + component, + componentType: C === 'i32' ? 'i' : 'u', + coordType: 'f', + coords, + }; + }); + const viewDescriptor = { + dimension: viewDimension, + }; + const textureType = appendComponentTypeForFormatToTextureType('texture_cube', format); + const results = await doTextureCalls( + t, + texture, + viewDescriptor, + textureType, + sampler, + calls, + stage + ); + const res = await checkCallResults( + t, + { texels, descriptor, viewDescriptor }, + textureType, + sampler, + calls, + results, + stage, + texture + ); + t.expectOK(res); + }); g.test('sampled_array_2d_coords') .specURL('https://www.w3.org/TR/WGSL/#texturegather') @@ -122,17 +289,90 @@ Parameters: Values outside of this range will result in a shader-creation error. ` ) - .paramsSubcasesOnly(u => + .params(u => u - .combine('T', ['f32-only', 'i32', 'u32'] as const) - .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat']) + .combine('stage', kShortShaderStages) + .combine('format', kTestableColorFormats) + .filter(t => isFillable(t.format)) + .combine('filt', ['nearest', 'linear'] as const) + .combine('modeU', kShortAddressModes) + .combine('modeV', kShortAddressModes) + .combine('offset', [false, true] as const) + .beginSubcases() + .combine('samplePoints', kSamplePointMethods) .combine('C', ['i32', 'u32'] as const) - .combine('C_value', [-1, 0, 1, 2, 3, 4] as const) - .combine('coords', generateCoordBoundaries(2)) - /* array_index not param'd as out-of-bounds is implementation specific */ - .combine('offset', generateOffsets(2)) + .combine('A', ['i32', 'u32'] as const) ) - .unimplemented(); + .beforeAllSubcases(t => { + t.skipIfTextureFormatNotSupported(t.params.format); + skipIfNeedsFilteringAndIsUnfilterableOrSelectDevice(t, t.params.filt, t.params.format); + }) + .fn(async t => { + const { format, stage, samplePoints, C, A, modeU, modeV, filt: minFilter, offset } = t.params; + + // We want at least 4 blocks or something wide enough for 3 mip levels. + const [width, height] = chooseTextureSize({ minSize: 8, minBlocks: 4, format }); + const depthOrArrayLayers = 4; + + const descriptor: GPUTextureDescriptor = { + format, + size: { width, height, depthOrArrayLayers }, + mipLevelCount: 3, + usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING, + }; + const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor); + const sampler: GPUSamplerDescriptor = { + addressModeU: kShortAddressModeToAddressMode[modeU], + addressModeV: kShortAddressModeToAddressMode[modeV], + minFilter, + magFilter: minFilter, + mipmapFilter: minFilter, + }; + + const calls: TextureCall[] = generateTextureBuiltinInputs2D(50, { + method: samplePoints, + textureBuiltin: 'textureGather', + sampler, + descriptor, + arrayIndex: { num: texture.depthOrArrayLayers, type: A }, + offset, + component: true, + hashInputs: [stage, format, samplePoints, C, A, modeU, modeV, minFilter, offset], + }).map(({ coords, component, arrayIndex, offset }) => { + return { + builtin: 'textureGather', + component, + componentType: C === 'i32' ? 'i' : 'u', + coordType: 'f', + coords, + arrayIndex, + arrayIndexType: A === 'i32' ? 'i' : 'u', + offset, + }; + }); + const textureType = appendComponentTypeForFormatToTextureType('texture_2d_array', format); + const viewDescriptor = {}; + const results = await doTextureCalls( + t, + texture, + viewDescriptor, + textureType, + sampler, + calls, + stage + ); + const res = await checkCallResults( + t, + { texels, descriptor, viewDescriptor }, + textureType, + sampler, + calls, + results, + stage, + texture + ); + t.expectOK(res); + }); g.test('sampled_array_3d_coords') .specURL('https://www.w3.org/TR/WGSL/#texturegather') @@ -140,8 +380,9 @@ g.test('sampled_array_3d_coords') ` C: i32, u32 T: i32, u32, f32 +A: i32, u32 -fn textureGather(component: C, t: texture_cube_array, s: sampler, coords: vec3, array_index: C) -> vec4 +fn textureGather(component: C, t: texture_cube_array, s: sampler, coords: vec3, array_index: A) -> vec4 Parameters: * component: @@ -154,17 +395,90 @@ Parameters: * array_index: The 0-based texture array index ` ) - .paramsSubcasesOnly( - u => - u - .combine('T', ['f32-only', 'i32', 'u32'] as const) - .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat']) - .combine('C', ['i32', 'u32'] as const) - .combine('C_value', [-1, 0, 1, 2, 3, 4] as const) - .combine('coords', generateCoordBoundaries(3)) - /* array_index not param'd as out-of-bounds is implementation specific */ + .params(u => + u + .combine('stage', kShortShaderStages) + .combine('format', kTestableColorFormats) + .filter(t => isFillable(t.format)) + .combine('filt', ['nearest', 'linear'] as const) + .combine('mode', kShortAddressModes) + .beginSubcases() + .combine('samplePoints', kCubeSamplePointMethods) + .combine('C', ['i32', 'u32'] as const) + .combine('A', ['i32', 'u32'] as const) ) - .unimplemented(); + .beforeAllSubcases(t => { + t.skipIfTextureFormatNotSupported(t.params.format); + t.skipIfTextureViewDimensionNotSupported('cube-array'); + skipIfNeedsFilteringAndIsUnfilterableOrSelectDevice(t, t.params.filt, t.params.format); + }) + .fn(async t => { + const { format, C, A, stage, samplePoints, mode, filt: minFilter } = t.params; + + const viewDimension: GPUTextureViewDimension = 'cube-array'; + const size = chooseTextureSize({ minSize: 8, minBlocks: 2, format, viewDimension }); + + const descriptor: GPUTextureDescriptor = { + format, + ...(t.isCompatibility && { textureBindingViewDimension: viewDimension }), + size, + usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING, + mipLevelCount: 3, + }; + const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor); + const sampler: GPUSamplerDescriptor = { + addressModeU: kShortAddressModeToAddressMode[mode], + addressModeV: kShortAddressModeToAddressMode[mode], + addressModeW: kShortAddressModeToAddressMode[mode], + minFilter, + magFilter: minFilter, + mipmapFilter: minFilter, + }; + + const calls: TextureCall[] = generateSamplePointsCube(50, { + method: samplePoints, + sampler, + descriptor, + component: true, + textureBuiltin: 'textureGather', + arrayIndex: { num: texture.depthOrArrayLayers / 6, type: A }, + hashInputs: [stage, format, C, samplePoints, mode, minFilter], + }).map(({ coords, component, arrayIndex }) => { + return { + builtin: 'textureGather', + component, + componentType: C === 'i32' ? 'i' : 'u', + arrayIndex, + arrayIndexType: A === 'i32' ? 'i' : 'u', + coordType: 'f', + coords, + }; + }); + const viewDescriptor = { + dimension: viewDimension, + }; + const textureType = appendComponentTypeForFormatToTextureType('texture_cube_array', format); + const results = await doTextureCalls( + t, + texture, + viewDescriptor, + textureType, + sampler, + calls, + stage + ); + const res = await checkCallResults( + t, + { texels, descriptor, viewDescriptor }, + textureType, + sampler, + calls, + results, + stage, + texture + ); + t.expectOK(res); + }); g.test('depth_2d_coords') .specURL('https://www.w3.org/TR/WGSL/#texturegather') @@ -185,13 +499,79 @@ Parameters: Values outside of this range will result in a shader-creation error. ` ) - .paramsSubcasesOnly(u => + .params(u => u - .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat']) - .combine('coords', generateCoordBoundaries(2)) - .combine('offset', generateOffsets(2)) + .combine('stage', kShortShaderStages) + .combine('format', kDepthStencilFormats) + // filter out stencil only formats + .filter(t => isDepthTextureFormat(t.format)) + // MAINTENANCE_TODO: Remove when support for depth24plus, depth24plus-stencil8, and depth32float-stencil8 is added. + .filter(t => isEncodableTextureFormat(t.format)) + .combine('filt', ['nearest', 'linear'] as const) + .combine('modeU', kShortAddressModes) + .combine('modeV', kShortAddressModes) + .combine('offset', [false, true] as const) + .beginSubcases() + .combine('samplePoints', kSamplePointMethods) ) - .unimplemented(); + .fn(async t => { + const { format, stage, samplePoints, modeU, modeV, filt: minFilter, offset } = t.params; + + // We want at least 4 blocks or something wide enough for 3 mip levels. + const [width, height] = chooseTextureSize({ minSize: 8, minBlocks: 4, format }); + const descriptor: GPUTextureDescriptor = { + format, + size: { width, height }, + mipLevelCount: 3, + usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING, + }; + const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor); + const sampler: GPUSamplerDescriptor = { + addressModeU: kShortAddressModeToAddressMode[modeU], + addressModeV: kShortAddressModeToAddressMode[modeV], + minFilter, + magFilter: minFilter, + mipmapFilter: minFilter, + }; + + const calls: TextureCall[] = generateTextureBuiltinInputs2D(50, { + method: samplePoints, + textureBuiltin: 'textureGather', + sampler, + descriptor, + offset, + hashInputs: [stage, format, samplePoints, modeU, modeV, minFilter, offset], + }).map(({ coords, offset }) => { + return { + builtin: 'textureGather', + coordType: 'f', + coords, + offset, + }; + }); + const textureType = 'texture_depth_2d'; + const viewDescriptor = {}; + const results = await doTextureCalls( + t, + texture, + viewDescriptor, + textureType, + sampler, + calls, + stage + ); + const res = await checkCallResults( + t, + { texels, descriptor, viewDescriptor }, + textureType, + sampler, + calls, + results, + stage, + texture + ); + t.expectOK(res); + }); g.test('depth_3d_coords') .specURL('https://www.w3.org/TR/WGSL/#texturegather') @@ -205,21 +585,90 @@ Parameters: * coords: The texture coordinates ` ) - .paramsSubcasesOnly(u => + .params(u => u - .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat']) - .combine('coords', generateCoordBoundaries(3)) + .combine('stage', kShortShaderStages) + .combine('format', kDepthStencilFormats) + // filter out stencil only formats + .filter(t => isDepthTextureFormat(t.format)) + // MAINTENANCE_TODO: Remove when support for depth24plus, depth24plus-stencil8, and depth32float-stencil8 is added. + .filter(t => isEncodableTextureFormat(t.format)) + .combine('filt', ['nearest', 'linear'] as const) + .combine('mode', kShortAddressModes) + .beginSubcases() + .combine('samplePoints', kCubeSamplePointMethods) ) - .unimplemented(); + .fn(async t => { + const { format, stage, samplePoints, mode, filt: minFilter } = t.params; + + const viewDimension: GPUTextureViewDimension = 'cube'; + const [width, height] = chooseTextureSize({ minSize: 8, minBlocks: 2, format, viewDimension }); + const depthOrArrayLayers = 6; + + const descriptor: GPUTextureDescriptor = { + format, + ...(t.isCompatibility && { textureBindingViewDimension: viewDimension }), + size: { width, height, depthOrArrayLayers }, + usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING, + mipLevelCount: 3, + }; + const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor); + const sampler: GPUSamplerDescriptor = { + addressModeU: kShortAddressModeToAddressMode[mode], + addressModeV: kShortAddressModeToAddressMode[mode], + addressModeW: kShortAddressModeToAddressMode[mode], + minFilter, + magFilter: minFilter, + mipmapFilter: minFilter, + }; + + const calls: TextureCall[] = generateSamplePointsCube(50, { + method: samplePoints, + sampler, + descriptor, + textureBuiltin: 'textureGather', + hashInputs: [stage, format, samplePoints, mode, minFilter], + }).map(({ coords, component }) => { + return { + builtin: 'textureGather', + coordType: 'f', + coords, + }; + }); + const viewDescriptor = { + dimension: viewDimension, + }; + const textureType = 'texture_depth_cube'; + const results = await doTextureCalls( + t, + texture, + viewDescriptor, + textureType, + sampler, + calls, + stage + ); + const res = await checkCallResults( + t, + { texels, descriptor, viewDescriptor }, + textureType, + sampler, + calls, + results, + stage, + texture + ); + t.expectOK(res); + }); g.test('depth_array_2d_coords') .specURL('https://www.w3.org/TR/WGSL/#texturegather') .desc( ` -C: i32, u32 +A: i32, u32 -fn textureGather(t: texture_depth_2d_array, s: sampler, coords: vec2, array_index: C) -> vec4 -fn textureGather(t: texture_depth_2d_array, s: sampler, coords: vec2, array_index: C, offset: vec2) -> vec4 +fn textureGather(t: texture_depth_2d_array, s: sampler, coords: vec2, array_index: A) -> vec4 +fn textureGather(t: texture_depth_2d_array, s: sampler, coords: vec2, array_index: A, offset: vec2) -> vec4 Parameters: * t: The depth texture to read from @@ -234,23 +683,97 @@ Parameters: Values outside of this range will result in a shader-creation error. ` ) - .paramsSubcasesOnly(u => + .params(u => u - .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat']) - .combine('C', ['i32', 'u32'] as const) - .combine('coords', generateCoordBoundaries(2)) - /* array_index not param'd as out-of-bounds is implementation specific */ - .combine('offset', generateOffsets(2)) + .combine('stage', kShortShaderStages) + .combine('format', kDepthStencilFormats) + // filter out stencil only formats + .filter(t => isDepthTextureFormat(t.format)) + // MAINTENANCE_TODO: Remove when support for depth24plus, depth24plus-stencil8, and depth32float-stencil8 is added. + .filter(t => isEncodableTextureFormat(t.format)) + .combine('filt', ['nearest', 'linear'] as const) + .combine('modeU', kShortAddressModes) + .combine('modeV', kShortAddressModes) + .combine('offset', [false, true] as const) + .beginSubcases() + .combine('samplePoints', kSamplePointMethods) + .combine('A', ['i32', 'u32'] as const) ) - .unimplemented(); + .beforeAllSubcases(t => { + t.skipIfTextureFormatNotSupported(t.params.format); + skipIfNeedsFilteringAndIsUnfilterableOrSelectDevice(t, t.params.filt, t.params.format); + }) + .fn(async t => { + const { format, stage, samplePoints, A, modeU, modeV, filt: minFilter, offset } = t.params; + + // We want at least 4 blocks or something wide enough for 3 mip levels. + const [width, height] = chooseTextureSize({ minSize: 8, minBlocks: 4, format }); + const depthOrArrayLayers = 4; + + const descriptor: GPUTextureDescriptor = { + format, + size: { width, height, depthOrArrayLayers }, + mipLevelCount: 3, + usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING, + }; + const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor); + const sampler: GPUSamplerDescriptor = { + addressModeU: kShortAddressModeToAddressMode[modeU], + addressModeV: kShortAddressModeToAddressMode[modeV], + minFilter, + magFilter: minFilter, + mipmapFilter: minFilter, + }; + + const calls: TextureCall[] = generateTextureBuiltinInputs2D(50, { + method: samplePoints, + textureBuiltin: 'textureGather', + sampler, + descriptor, + arrayIndex: { num: texture.depthOrArrayLayers, type: A }, + offset, + hashInputs: [stage, format, samplePoints, A, modeU, modeV, minFilter, offset], + }).map(({ coords, arrayIndex, offset }) => { + return { + builtin: 'textureGather', + coordType: 'f', + coords, + arrayIndex, + arrayIndexType: A === 'i32' ? 'i' : 'u', + offset, + }; + }); + const textureType = 'texture_depth_2d_array'; + const viewDescriptor = {}; + const results = await doTextureCalls( + t, + texture, + viewDescriptor, + textureType, + sampler, + calls, + stage + ); + const res = await checkCallResults( + t, + { texels, descriptor, viewDescriptor }, + textureType, + sampler, + calls, + results, + stage, + texture + ); + t.expectOK(res); + }); g.test('depth_array_3d_coords') .specURL('https://www.w3.org/TR/WGSL/#texturegather') .desc( ` -C: i32, u32 +A: i32, u32 -fn textureGather(t: texture_depth_cube_array, s: sampler, coords: vec3, array_index: C) -> vec4 +fn textureGather(t: texture_depth_cube_array, s: sampler, coords: vec3, array_index: A) -> vec4 Parameters: * t: The depth texture to read from @@ -259,12 +782,84 @@ Parameters: * array_index: The 0-based texture array index ` ) - .paramsSubcasesOnly( - u => - u - .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat']) - .combine('C', ['i32', 'u32'] as const) - .combine('coords', generateCoordBoundaries(3)) - /* array_index not param'd as out-of-bounds is implementation specific */ + .params(u => + u + .combine('stage', kShortShaderStages) + .combine('format', kDepthStencilFormats) + // filter out stencil only formats + .filter(t => isDepthTextureFormat(t.format)) + // MAINTENANCE_TODO: Remove when support for depth24plus, depth24plus-stencil8, and depth32float-stencil8 is added. + .filter(t => isEncodableTextureFormat(t.format)) + .combine('filt', ['nearest', 'linear'] as const) + .combine('mode', kShortAddressModes) + .beginSubcases() + .combine('samplePoints', kCubeSamplePointMethods) + .combine('A', ['i32', 'u32'] as const) ) - .unimplemented(); + .beforeAllSubcases(t => { + t.skipIfTextureViewDimensionNotSupported('cube-array'); + }) + .fn(async t => { + const { format, A, stage, samplePoints, mode, filt: minFilter } = t.params; + + const viewDimension: GPUTextureViewDimension = 'cube-array'; + const size = chooseTextureSize({ minSize: 8, minBlocks: 2, format, viewDimension }); + + const descriptor: GPUTextureDescriptor = { + format, + ...(t.isCompatibility && { textureBindingViewDimension: viewDimension }), + size, + usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING, + mipLevelCount: 3, + }; + const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor); + const sampler: GPUSamplerDescriptor = { + addressModeU: kShortAddressModeToAddressMode[mode], + addressModeV: kShortAddressModeToAddressMode[mode], + addressModeW: kShortAddressModeToAddressMode[mode], + minFilter, + magFilter: minFilter, + mipmapFilter: minFilter, + }; + + const calls: TextureCall[] = generateSamplePointsCube(50, { + method: samplePoints, + sampler, + descriptor, + textureBuiltin: 'textureGather', + arrayIndex: { num: texture.depthOrArrayLayers / 6, type: A }, + hashInputs: [stage, format, samplePoints, mode, minFilter], + }).map(({ coords, arrayIndex }) => { + return { + builtin: 'textureGather', + arrayIndex, + arrayIndexType: A === 'i32' ? 'i' : 'u', + coordType: 'f', + coords, + }; + }); + const viewDescriptor = { + dimension: viewDimension, + }; + const textureType = 'texture_depth_cube_array'; + const results = await doTextureCalls( + t, + texture, + viewDescriptor, + textureType, + sampler, + calls, + stage + ); + const res = await checkCallResults( + t, + { texels, descriptor, viewDescriptor }, + textureType, + sampler, + calls, + results, + stage, + texture + ); + t.expectOK(res); + }); diff --git a/src/webgpu/shader/execution/expression/call/builtin/textureGatherCompare.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/textureGatherCompare.spec.ts index c743883ce849..f86a152c19bc 100644 --- a/src/webgpu/shader/execution/expression/call/builtin/textureGatherCompare.spec.ts +++ b/src/webgpu/shader/execution/expression/call/builtin/textureGatherCompare.spec.ts @@ -17,20 +17,42 @@ A texture gather compare operation performs a depth comparison on four texels in `; import { makeTestGroup } from '../../../../../../common/framework/test_group.js'; -import { GPUTest } from '../../../../../gpu_test.js'; +import { kCompareFunctions } from '../../../../../capability_info.js'; +import { + isDepthTextureFormat, + isEncodableTextureFormat, + kDepthStencilFormats, +} from '../../../../../format_info.js'; -import { generateCoordBoundaries, generateOffsets } from './utils.js'; +import { + checkCallResults, + chooseTextureSize, + createTextureWithRandomDataAndGetTexels, + doTextureCalls, + generateSamplePointsCube, + generateTextureBuiltinInputs2D, + kCubeSamplePointMethods, + kSamplePointMethods, + kShortAddressModes, + kShortAddressModeToAddressMode, + kShortShaderStages, + makeRandomDepthComparisonTexelGenerator, + TextureCall, + vec2, + vec3, + WGSLTextureSampleTest, +} from './texture_utils.js'; -export const g = makeTestGroup(GPUTest); +export const g = makeTestGroup(WGSLTextureSampleTest); g.test('array_2d_coords') .specURL('https://www.w3.org/TR/WGSL/#texturegathercompare') .desc( ` -C: i32, u32 +A: i32, u32 -fn textureGatherCompare(t: texture_depth_2d_array, s: sampler_comparison, coords: vec2, array_index: C, depth_ref: f32) -> vec4 -fn textureGatherCompare(t: texture_depth_2d_array, s: sampler_comparison, coords: vec2, array_index: C, depth_ref: f32, offset: vec2) -> vec4 +fn textureGatherCompare(t: texture_depth_2d_array, s: sampler_comparison, coords: vec2, array_index: A, depth_ref: f32) -> vec4 +fn textureGatherCompare(t: texture_depth_2d_array, s: sampler_comparison, coords: vec2, array_index: A, depth_ref: f32, offset: vec2) -> vec4 Parameters: * t: The depth texture to read from @@ -46,24 +68,110 @@ Parameters: Values outside of this range will result in a shader-creation error. ` ) - .paramsSubcasesOnly(u => + .params(u => u - .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat']) - .combine('C', ['i32', 'u32'] as const) - .combine('C_value', [-1, 0, 1, 2, 3, 4]) - .combine('coords', generateCoordBoundaries(2)) - .combine('depth_ref', [-1 /* smaller ref */, 0 /* equal ref */, 1 /* larger ref */] as const) - .combine('offset', generateOffsets(2)) + .combine('stage', kShortShaderStages) + .combine('format', kDepthStencilFormats) + // filter out stencil only formats + .filter(t => isDepthTextureFormat(t.format)) + // MAINTENANCE_TODO: Remove when support for depth24plus, depth24plus-stencil8, and depth32float-stencil8 is added. + .filter(t => isEncodableTextureFormat(t.format)) + .combine('filt', ['nearest', 'linear'] as const) + .combine('modeU', kShortAddressModes) + .combine('modeV', kShortAddressModes) + .combine('offset', [false, true] as const) + .beginSubcases() + .combine('samplePoints', kSamplePointMethods) + .combine('A', ['i32', 'u32'] as const) + .combine('compare', kCompareFunctions) ) - .unimplemented(); + .beforeAllSubcases(t => { + t.skipIfTextureFormatNotSupported(t.params.format); + }) + .fn(async t => { + const { + format, + stage, + samplePoints, + A, + modeU, + modeV, + filt: minFilter, + compare, + offset, + } = t.params; + + const viewDimension = '2d-array'; + const size = chooseTextureSize({ minSize: 8, minBlocks: 4, format, viewDimension }); + + const descriptor: GPUTextureDescriptor = { + format, + size, + usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING, + }; + const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor, { + generator: makeRandomDepthComparisonTexelGenerator(descriptor, compare), + }); + const sampler: GPUSamplerDescriptor = { + addressModeU: kShortAddressModeToAddressMode[modeU], + addressModeV: kShortAddressModeToAddressMode[modeV], + compare, + minFilter, + magFilter: minFilter, + mipmapFilter: minFilter, + }; + + const calls: TextureCall[] = generateTextureBuiltinInputs2D(50, { + method: samplePoints, + textureBuiltin: 'textureGatherCompare', + sampler, + descriptor, + arrayIndex: { num: texture.depthOrArrayLayers, type: A }, + depthRef: true, + offset, + hashInputs: [stage, format, samplePoints, A, modeU, modeV, minFilter, offset], + }).map(({ coords, arrayIndex, depthRef, offset }) => { + return { + builtin: 'textureGatherCompare', + coordType: 'f', + coords, + arrayIndex, + arrayIndexType: A === 'i32' ? 'i' : 'u', + depthRef, + offset, + }; + }); + const textureType = 'texture_depth_2d_array'; + const viewDescriptor = {}; + const results = await doTextureCalls( + t, + texture, + viewDescriptor, + textureType, + sampler, + calls, + stage + ); + const res = await checkCallResults( + t, + { texels, descriptor, viewDescriptor }, + textureType, + sampler, + calls, + results, + stage, + texture + ); + t.expectOK(res); + }); g.test('array_3d_coords') .specURL('https://www.w3.org/TR/WGSL/#texturegathercompare') .desc( ` -C: i32, u32 +A: i32, u32 -fn textureGatherCompare(t: texture_depth_cube_array, s: sampler_comparison, coords: vec3, array_index: C, depth_ref: f32) -> vec4 +fn textureGatherCompare(t: texture_depth_cube_array, s: sampler_comparison, coords: vec3, array_index: A, depth_ref: f32) -> vec4 Parameters: * t: The depth texture to read from @@ -73,17 +181,94 @@ Parameters: * depth_ref: The reference value to compare the sampled depth value against ` ) - .paramsSubcasesOnly(u => + .params(u => u - .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat']) - .combine('C', ['i32', 'u32'] as const) - .combine('C_value', [-1, 0, 1, 2, 3, 4]) - .combine('coords', generateCoordBoundaries(3)) - .combine('depth_ref', [-1 /* smaller ref */, 0 /* equal ref */, 1 /* larger ref */] as const) + .combine('stage', kShortShaderStages) + .combine('format', kDepthStencilFormats) + // filter out stencil only formats + .filter(t => isDepthTextureFormat(t.format)) + // MAINTENANCE_TODO: Remove when support for depth24plus, depth24plus-stencil8, and depth32float-stencil8 is added. + .filter(t => isEncodableTextureFormat(t.format)) + .combine('filt', ['nearest', 'linear'] as const) + .combine('mode', kShortAddressModes) + .beginSubcases() + .combine('samplePoints', kCubeSamplePointMethods) + .combine('A', ['i32', 'u32'] as const) + .combine('compare', kCompareFunctions) ) - .unimplemented(); + .beforeAllSubcases(t => { + t.skipIfTextureViewDimensionNotSupported('cube-array'); + }) + .fn(async t => { + const { format, A, stage, samplePoints, mode, filt: minFilter, compare } = t.params; + + const viewDimension: GPUTextureViewDimension = 'cube-array'; + const size = chooseTextureSize({ minSize: 8, minBlocks: 2, format, viewDimension }); + + const descriptor: GPUTextureDescriptor = { + format, + ...(t.isCompatibility && { textureBindingViewDimension: viewDimension }), + size, + usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING, + }; + const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor, { + generator: makeRandomDepthComparisonTexelGenerator(descriptor, compare), + }); + const sampler: GPUSamplerDescriptor = { + addressModeU: kShortAddressModeToAddressMode[mode], + addressModeV: kShortAddressModeToAddressMode[mode], + addressModeW: kShortAddressModeToAddressMode[mode], + compare, + minFilter, + magFilter: minFilter, + mipmapFilter: minFilter, + }; -g.test('sampled_array_2d_coords') + const calls: TextureCall[] = generateSamplePointsCube(50, { + method: samplePoints, + sampler, + descriptor, + textureBuiltin: 'textureGatherCompare', + arrayIndex: { num: texture.depthOrArrayLayers / 6, type: A }, + depthRef: true, + hashInputs: [stage, format, samplePoints, mode, minFilter], + }).map(({ coords, depthRef, arrayIndex }) => { + return { + builtin: 'textureGatherCompare', + arrayIndex, + arrayIndexType: A === 'i32' ? 'i' : 'u', + coordType: 'f', + coords, + depthRef, + }; + }); + const viewDescriptor = { + dimension: viewDimension, + }; + const textureType = 'texture_depth_cube_array'; + const results = await doTextureCalls( + t, + texture, + viewDescriptor, + textureType, + sampler, + calls, + stage + ); + const res = await checkCallResults( + t, + { texels, descriptor, viewDescriptor }, + textureType, + sampler, + calls, + results, + stage, + texture + ); + t.expectOK(res); + }); + +g.test('sampled_2d_coords') .specURL('https://www.w3.org/TR/WGSL/#texturegathercompare') .desc( ` @@ -103,16 +288,85 @@ Parameters: Values outside of this range will result in a shader-creation error. ` ) - .paramsSubcasesOnly(u => + .params(u => u - .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat']) - .combine('coords', generateCoordBoundaries(2)) - .combine('depth_ref', [-1 /* smaller ref */, 0 /* equal ref */, 1 /* larger ref */] as const) - .combine('offset', generateOffsets(2)) + .combine('stage', kShortShaderStages) + .combine('format', kDepthStencilFormats) + // filter out stencil only formats + .filter(t => isDepthTextureFormat(t.format)) + // MAINTENANCE_TODO: Remove when support for depth24plus, depth24plus-stencil8, and depth32float-stencil8 is added. + .filter(t => isEncodableTextureFormat(t.format)) + .combine('filt', ['nearest', 'linear'] as const) + .combine('mode', kShortAddressModes) + .combine('offset', [false, true] as const) + .beginSubcases() + .combine('C', ['i32', 'u32'] as const) + .combine('samplePoints', kSamplePointMethods) + .combine('compare', kCompareFunctions) ) - .unimplemented(); + .fn(async t => { + const { format, C, stage, samplePoints, mode, compare, filt: minFilter, offset } = t.params; + + const size = chooseTextureSize({ minSize: 8, minBlocks: 4, format }); + const descriptor: GPUTextureDescriptor = { + format, + size, + usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING, + }; + const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor, { + generator: makeRandomDepthComparisonTexelGenerator(descriptor, compare), + }); + const sampler: GPUSamplerDescriptor = { + addressModeU: kShortAddressModeToAddressMode[mode], + addressModeV: kShortAddressModeToAddressMode[mode], + compare, + minFilter, + magFilter: minFilter, + mipmapFilter: minFilter, + }; + + const calls: TextureCall[] = generateTextureBuiltinInputs2D(50, { + method: samplePoints, + textureBuiltin: 'textureGatherCompare', + sampler, + descriptor, + offset, + depthRef: true, + hashInputs: [stage, format, C, samplePoints, mode, minFilter, compare, offset], + }).map(({ coords, depthRef, offset }) => { + return { + builtin: 'textureGatherCompare', + coordType: 'f', + coords, + depthRef, + offset, + }; + }); + const textureType = 'texture_depth_2d'; + const viewDescriptor = {}; + const results = await doTextureCalls( + t, + texture, + viewDescriptor, + textureType, + sampler, + calls, + stage + ); + const res = await checkCallResults( + t, + { texels, descriptor, viewDescriptor }, + textureType, + sampler, + calls, + results, + stage, + texture + ); + t.expectOK(res); + }); -g.test('sampled_array_3d_coords') +g.test('sampled_3d_coords') .specURL('https://www.w3.org/TR/WGSL/#texturegathercompare') .desc( ` @@ -125,10 +379,82 @@ Parameters: * depth_ref: The reference value to compare the sampled depth value against ` ) - .paramsSubcasesOnly(u => + .params(u => u - .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat']) - .combine('coords', generateCoordBoundaries(3)) - .combine('depth_ref', [-1 /* smaller ref */, 0 /* equal ref */, 1 /* larger ref */] as const) + .combine('stage', kShortShaderStages) + .combine('format', kDepthStencilFormats) + // filter out stencil only formats + .filter(t => isDepthTextureFormat(t.format)) + // MAINTENANCE_TODO: Remove when support for depth24plus, depth24plus-stencil8, and depth32float-stencil8 is added. + .filter(t => isEncodableTextureFormat(t.format)) + .combine('filt', ['nearest', 'linear'] as const) + .combine('mode', kShortAddressModes) + .beginSubcases() + .combine('samplePoints', kCubeSamplePointMethods) + .combine('compare', kCompareFunctions) ) - .unimplemented(); + .fn(async t => { + const { format, stage, samplePoints, mode, filt: minFilter, compare } = t.params; + + const viewDimension: GPUTextureViewDimension = 'cube'; + const size = chooseTextureSize({ minSize: 8, minBlocks: 2, format, viewDimension }); + + const descriptor: GPUTextureDescriptor = { + format, + ...(t.isCompatibility && { textureBindingViewDimension: viewDimension }), + size, + usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING, + }; + const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor, { + generator: makeRandomDepthComparisonTexelGenerator(descriptor, compare), + }); + const sampler: GPUSamplerDescriptor = { + addressModeU: kShortAddressModeToAddressMode[mode], + addressModeV: kShortAddressModeToAddressMode[mode], + addressModeW: kShortAddressModeToAddressMode[mode], + compare, + minFilter, + magFilter: minFilter, + mipmapFilter: minFilter, + }; + + const calls: TextureCall[] = generateSamplePointsCube(50, { + method: samplePoints, + sampler, + descriptor, + depthRef: true, + textureBuiltin: 'textureGatherCompare', + hashInputs: [stage, format, samplePoints, mode, minFilter, compare], + }).map(({ coords, depthRef }) => { + return { + builtin: 'textureGatherCompare', + coordType: 'f', + coords, + depthRef, + }; + }); + const viewDescriptor = { + dimension: viewDimension, + }; + const textureType = 'texture_depth_cube'; + const results = await doTextureCalls( + t, + texture, + viewDescriptor, + textureType, + sampler, + calls, + stage + ); + const res = await checkCallResults( + t, + { texels, descriptor, viewDescriptor }, + textureType, + sampler, + calls, + results, + stage, + texture + ); + t.expectOK(res); + }); diff --git a/src/webgpu/shader/execution/expression/call/builtin/textureLoad.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/textureLoad.spec.ts index 879817ec8ca3..689df4feb084 100644 --- a/src/webgpu/shader/execution/expression/call/builtin/textureLoad.spec.ts +++ b/src/webgpu/shader/execution/expression/call/builtin/textureLoad.spec.ts @@ -50,6 +50,7 @@ import { vec2, vec3, kSamplePointMethods, + kShortShaderStages, generateTextureBuiltinInputs1D, generateTextureBuiltinInputs2D, generateTextureBuiltinInputs3D, @@ -90,6 +91,7 @@ Parameters: ) .params(u => u + .combine('stage', kShortShaderStages) .combine('format', kTestableColorFormats) .filter(t => textureDimensionAndFormatCompatible('1d', t.format)) // 1d textures can't have a height !== 1 @@ -105,7 +107,7 @@ Parameters: t.selectDeviceForTextureFormatOrSkipTestCase(t.params.format); }) .fn(async t => { - const { format, C, L, samplePoints } = t.params; + const { format, stage, C, L, samplePoints } = t.params; // We want at least 4 blocks or something wide enough for 3 mip levels. const [width] = chooseTextureSize({ minSize: 8, minBlocks: 4, format }); @@ -123,7 +125,7 @@ Parameters: method: samplePoints, descriptor, mipLevel: { num: texture.mipLevelCount, type: L }, - hashInputs: [format, samplePoints, C, L], + hashInputs: [stage, format, samplePoints, C, L], }).map(({ coords, mipLevel }, i) => { return { builtin: 'textureLoad', @@ -137,14 +139,24 @@ Parameters: const textureType = appendComponentTypeForFormatToTextureType('texture_1d', texture.format); const viewDescriptor = {}; const sampler = undefined; - const results = await doTextureCalls(t, texture, viewDescriptor, textureType, sampler, calls); + const results = await doTextureCalls( + t, + texture, + viewDescriptor, + textureType, + sampler, + calls, + stage + ); const res = await checkCallResults( t, { texels, descriptor, viewDescriptor }, textureType, sampler, calls, - results + results, + stage, + texture ); t.expectOK(res); }); @@ -166,9 +178,9 @@ Parameters: ) .params(u => u + .combine('stage', kShortShaderStages) .combine('format', kTestableColorFormats) - // MAINTENANCE_TODO: Update createTextureFromTexelViews to support stencil8 and remove this filter. - .filter(t => t.format !== 'stencil8' && !isCompressedFloatTextureFormat(t.format)) + .filter(t => !isCompressedFloatTextureFormat(t.format)) .beginSubcases() .combine('samplePoints', kSamplePointMethods) .combine('C', ['i32', 'u32'] as const) @@ -180,7 +192,7 @@ Parameters: t.selectDeviceForTextureFormatOrSkipTestCase(t.params.format); }) .fn(async t => { - const { format, samplePoints, C, L } = t.params; + const { format, stage, samplePoints, C, L } = t.params; // We want at least 4 blocks or something wide enough for 3 mip levels. const size = chooseTextureSize({ minSize: 8, minBlocks: 4, format }); @@ -188,10 +200,7 @@ Parameters: const descriptor: GPUTextureDescriptor = { format, size, - usage: - GPUTextureUsage.COPY_DST | - GPUTextureUsage.TEXTURE_BINDING | - (canUseAsRenderTarget(format) ? GPUTextureUsage.RENDER_ATTACHMENT : 0), + usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING, mipLevelCount: maxMipLevelCount({ size }), }; const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor); @@ -199,7 +208,7 @@ Parameters: const calls: TextureCall[] = generateTextureBuiltinInputs2D(50, { method: samplePoints, descriptor, - hashInputs: [format, samplePoints, C, L], + hashInputs: [stage, format, samplePoints, C, L], }).map(({ coords, mipLevel }) => { return { builtin: 'textureLoad', @@ -213,14 +222,24 @@ Parameters: const textureType = appendComponentTypeForFormatToTextureType('texture_2d', texture.format); const viewDescriptor = {}; const sampler = undefined; - const results = await doTextureCalls(t, texture, viewDescriptor, textureType, sampler, calls); + const results = await doTextureCalls( + t, + texture, + viewDescriptor, + textureType, + sampler, + calls, + stage + ); const res = await checkCallResults( t, { texels, descriptor, viewDescriptor }, textureType, sampler, calls, - results + results, + stage, + texture ); t.expectOK(res); }); @@ -241,6 +260,7 @@ Parameters: ) .params(u => u + .combine('stage', kShortShaderStages) .combine('format', kTestableColorFormats) .filter(t => textureDimensionAndFormatCompatible('3d', t.format)) .beginSubcases() @@ -254,7 +274,7 @@ Parameters: t.selectDeviceForTextureFormatOrSkipTestCase(t.params.format); }) .fn(async t => { - const { format, samplePoints, C, L } = t.params; + const { format, stage, samplePoints, C, L } = t.params; // We want at least 4 blocks or something wide enough for 3 mip levels. const size = chooseTextureSize({ minSize: 8, minBlocks: 4, format, viewDimension: '3d' }); @@ -272,7 +292,7 @@ Parameters: method: samplePoints, descriptor, mipLevel: { num: texture.mipLevelCount, type: L }, - hashInputs: [format, samplePoints, C, L], + hashInputs: [stage, format, samplePoints, C, L], }).map(({ coords, mipLevel }) => { return { builtin: 'textureLoad', @@ -286,14 +306,24 @@ Parameters: const textureType = appendComponentTypeForFormatToTextureType('texture_3d', texture.format); const viewDescriptor = {}; const sampler = undefined; - const results = await doTextureCalls(t, texture, viewDescriptor, textureType, sampler, calls); + const results = await doTextureCalls( + t, + texture, + viewDescriptor, + textureType, + sampler, + calls, + stage + ); const res = await checkCallResults( t, { texels, descriptor, viewDescriptor }, textureType, sampler, calls, - results + results, + stage, + texture ); t.expectOK(res); }); @@ -316,6 +346,7 @@ Parameters: ) .params(u => u + .combine('stage', kShortShaderStages) .combine('texture_type', [ 'texture_multisampled_2d', 'texture_depth_multisampled_2d', @@ -340,7 +371,7 @@ Parameters: t.selectDeviceForTextureFormatOrSkipTestCase(t.params.format); }) .fn(async t => { - const { texture_type, format, samplePoints, C, S } = t.params; + const { texture_type, format, stage, samplePoints, C, S } = t.params; const sampleCount = 4; const descriptor: GPUTextureDescriptor = { @@ -358,7 +389,7 @@ Parameters: method: samplePoints, descriptor, sampleIndex: { num: texture.sampleCount, type: S }, - hashInputs: [format, samplePoints, C, S], + hashInputs: [stage, format, samplePoints, C, S], }).map(({ coords, sampleIndex }) => { return { builtin: 'textureLoad', @@ -372,14 +403,24 @@ Parameters: const textureType = appendComponentTypeForFormatToTextureType(texture_type, texture.format); const viewDescriptor = {}; const sampler = undefined; - const results = await doTextureCalls(t, texture, viewDescriptor, textureType, sampler, calls); + const results = await doTextureCalls( + t, + texture, + viewDescriptor, + textureType, + sampler, + calls, + stage + ); const res = await checkCallResults( t, { texels, descriptor, viewDescriptor }, textureType, sampler, calls, - results + results, + stage, + texture ); t.expectOK(res); }); @@ -400,6 +441,7 @@ Parameters: ) .params(u => u + .combine('stage', kShortShaderStages) .combine('format', kDepthStencilFormats) // filter out stencil only formats .filter(t => isDepthTextureFormat(t.format)) @@ -414,7 +456,7 @@ Parameters: t.skipIfTextureLoadNotSupportedForTextureType('texture_depth_2d'); }) .fn(async t => { - const { format, samplePoints, C, L } = t.params; + const { format, stage, samplePoints, C, L } = t.params; // We want at least 4 blocks or something wide enough for 3 mip levels. const size = chooseTextureSize({ minSize: 8, minBlocks: 4, format }); @@ -422,10 +464,7 @@ Parameters: const descriptor: GPUTextureDescriptor = { format, size, - usage: - GPUTextureUsage.COPY_DST | - GPUTextureUsage.TEXTURE_BINDING | - GPUTextureUsage.RENDER_ATTACHMENT, + usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING, mipLevelCount: maxMipLevelCount({ size }), }; const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor); @@ -434,7 +473,7 @@ Parameters: method: samplePoints, descriptor, mipLevel: { num: texture.mipLevelCount, type: L }, - hashInputs: [format, samplePoints, C, L], + hashInputs: [stage, format, samplePoints, C, L], }).map(({ coords, mipLevel }) => { return { builtin: 'textureLoad', @@ -447,14 +486,24 @@ Parameters: const textureType = 'texture_depth_2d'; const viewDescriptor = {}; const sampler = undefined; - const results = await doTextureCalls(t, texture, viewDescriptor, textureType, sampler, calls); + const results = await doTextureCalls( + t, + texture, + viewDescriptor, + textureType, + sampler, + calls, + stage + ); const res = await checkCallResults( t, { texels, descriptor, viewDescriptor }, textureType, sampler, calls, - results + results, + stage, + texture ); t.expectOK(res); }); @@ -472,14 +521,19 @@ Parameters: * coords: The 0-based texel coordinate. ` ) - .paramsSubcasesOnly(u => + .params(u => u + .combine('stage', kShortShaderStages) + .beginSubcases() .combine('samplePoints', kSamplePointMethods) .combine('C', ['i32', 'u32'] as const) .combine('L', ['i32', 'u32'] as const) ) + .beforeAllSubcases(t => + t.skipIf(typeof VideoFrame === 'undefined', 'VideoFrames are not supported') + ) .fn(async t => { - const { samplePoints, C, L } = t.params; + const { stage, samplePoints, C, L } = t.params; const size = [8, 8, 1]; @@ -490,6 +544,7 @@ Parameters: size, usage: GPUTextureUsage.COPY_DST, }; + const { texels, videoFrame } = createVideoFrameWithRandomDataAndGetTexels(descriptor.size); const texture = t.device.importExternalTexture({ source: videoFrame }); @@ -508,14 +563,23 @@ Parameters: const textureType = 'texture_external'; const viewDescriptor = {}; const sampler = undefined; - const results = await doTextureCalls(t, texture, viewDescriptor, textureType, sampler, calls); + const results = await doTextureCalls( + t, + texture, + viewDescriptor, + textureType, + sampler, + calls, + stage + ); const res = await checkCallResults( t, { texels, descriptor, viewDescriptor }, textureType, sampler, calls, - results + results, + stage ); t.expectOK(res); videoFrame.close(); @@ -539,6 +603,7 @@ Parameters: ) .params(u => u + .combine('stage', kShortShaderStages) .combine('format', kTestableColorFormats) // MAINTENANCE_TODO: Update createTextureFromTexelViews to support stencil8 and remove this filter. .filter(t => t.format !== 'stencil8' && !isCompressedFloatTextureFormat(t.format)) @@ -562,7 +627,7 @@ Parameters: t.selectDeviceForTextureFormatOrSkipTestCase(t.params.format); }) .fn(async t => { - const { texture_type, format, samplePoints, C, A, L } = t.params; + const { texture_type, format, stage, samplePoints, C, A, L } = t.params; // We want at least 4 blocks or something wide enough for 3 mip levels. const size = chooseTextureSize({ minSize: 8, minBlocks: 4, format, viewDimension: '3d' }); @@ -583,7 +648,7 @@ Parameters: descriptor, mipLevel: { num: texture.mipLevelCount, type: L }, arrayIndex: { num: texture.depthOrArrayLayers, type: A }, - hashInputs: [format, samplePoints, C, L, A], + hashInputs: [stage, format, samplePoints, C, L, A], }).map(({ coords, mipLevel, arrayIndex }) => { return { builtin: 'textureLoad', @@ -598,14 +663,24 @@ Parameters: const textureType = appendComponentTypeForFormatToTextureType(texture_type, texture.format); const viewDescriptor = {}; const sampler = undefined; - const results = await doTextureCalls(t, texture, viewDescriptor, textureType, sampler, calls); + const results = await doTextureCalls( + t, + texture, + viewDescriptor, + textureType, + sampler, + calls, + stage + ); const res = await checkCallResults( t, { texels, descriptor, viewDescriptor }, textureType, sampler, calls, - results + results, + stage, + texture ); t.expectOK(res); }); @@ -625,6 +700,7 @@ Parameters: ) .params(u => u + .combine('stage', kShortShaderStages) .combineWithParams([...TexelFormats, { format: 'bgra8unorm' }] as const) .beginSubcases() .combine('samplePoints', kSamplePointMethods) @@ -639,7 +715,7 @@ Parameters: } }) .fn(async t => { - const { format, samplePoints, C } = t.params; + const { format, stage, samplePoints, C } = t.params; // We want at least 3 blocks or something wide enough for 3 mip levels. const [width] = chooseTextureSize({ minSize: 8, minBlocks: 4, format }); @@ -655,7 +731,7 @@ Parameters: const calls: TextureCall[] = generateTextureBuiltinInputs1D(50, { method: samplePoints, descriptor, - hashInputs: [format, samplePoints, C], + hashInputs: [stage, format, samplePoints, C], }).map(({ coords }) => { return { builtin: 'textureLoad', @@ -666,14 +742,24 @@ Parameters: const textureType = `texture_storage_1d<${format}, read>`; const viewDescriptor = {}; const sampler = undefined; - const results = await doTextureCalls(t, texture, viewDescriptor, textureType, sampler, calls); + const results = await doTextureCalls( + t, + texture, + viewDescriptor, + textureType, + sampler, + calls, + stage + ); const res = await checkCallResults( t, { texels, descriptor, viewDescriptor }, textureType, sampler, calls, - results + results, + stage, + texture ); t.expectOK(res); }); @@ -693,6 +779,7 @@ Parameters: ) .params(u => u + .combine('stage', kShortShaderStages) .combineWithParams([...TexelFormats, { format: 'bgra8unorm' }] as const) .beginSubcases() .combine('samplePoints', kSamplePointMethods) @@ -707,7 +794,7 @@ Parameters: } }) .fn(async t => { - const { format, samplePoints, C } = t.params; + const { format, stage, samplePoints, C } = t.params; // We want at least 3 blocks or something wide enough for 3 mip levels. const size = chooseTextureSize({ minSize: 8, minBlocks: 3, format }); @@ -721,7 +808,7 @@ Parameters: const calls: TextureCall[] = generateTextureBuiltinInputs2D(50, { method: samplePoints, descriptor, - hashInputs: [format, samplePoints, C], + hashInputs: [stage, format, samplePoints, C], }).map(({ coords }) => { return { builtin: 'textureLoad', @@ -732,14 +819,24 @@ Parameters: const textureType = `texture_storage_2d<${format}, read>`; const viewDescriptor = {}; const sampler = undefined; - const results = await doTextureCalls(t, texture, viewDescriptor, textureType, sampler, calls); + const results = await doTextureCalls( + t, + texture, + viewDescriptor, + textureType, + sampler, + calls, + stage + ); const res = await checkCallResults( t, { texels, descriptor, viewDescriptor }, textureType, sampler, calls, - results + results, + stage, + texture ); t.expectOK(res); }); @@ -761,6 +858,7 @@ Parameters: ) .params(u => u + .combine('stage', kShortShaderStages) .combineWithParams([...TexelFormats, { format: 'bgra8unorm' }] as const) .beginSubcases() .combine('samplePoints', kSamplePointMethods) @@ -776,7 +874,7 @@ Parameters: } }) .fn(async t => { - const { format, samplePoints, C, A } = t.params; + const { format, stage, samplePoints, C, A } = t.params; // We want at least 3 blocks or something wide enough for 3 mip levels. const size = chooseTextureSize({ minSize: 8, minBlocks: 4, format, viewDimension: '3d' }); @@ -791,7 +889,7 @@ Parameters: method: samplePoints, descriptor, arrayIndex: { num: texture.depthOrArrayLayers, type: A }, - hashInputs: [format, samplePoints, C, A], + hashInputs: [stage, format, samplePoints, C, A], }).map(({ coords, arrayIndex }) => { return { builtin: 'textureLoad', @@ -806,14 +904,24 @@ Parameters: dimension: '2d-array', }; const sampler = undefined; - const results = await doTextureCalls(t, texture, viewDescriptor, textureType, sampler, calls); + const results = await doTextureCalls( + t, + texture, + viewDescriptor, + textureType, + sampler, + calls, + stage + ); const res = await checkCallResults( t, { texels, descriptor, viewDescriptor }, textureType, sampler, calls, - results + results, + stage, + texture ); t.expectOK(res); }); @@ -833,6 +941,7 @@ Parameters: ) .params(u => u + .combine('stage', kShortShaderStages) .combineWithParams([...TexelFormats, { format: 'bgra8unorm' }] as const) .beginSubcases() .combine('samplePoints', kSamplePointMethods) @@ -847,7 +956,7 @@ Parameters: } }) .fn(async t => { - const { format, samplePoints, C } = t.params; + const { format, stage, samplePoints, C } = t.params; // We want at least 3 blocks or something wide enough for 3 mip levels. const size = chooseTextureSize({ minSize: 8, minBlocks: 4, format, viewDimension: '3d' }); @@ -862,7 +971,7 @@ Parameters: const calls: TextureCall[] = generateTextureBuiltinInputs3D(50, { method: samplePoints, descriptor, - hashInputs: [format, samplePoints, C], + hashInputs: [stage, format, samplePoints, C], }).map(({ coords }) => { return { builtin: 'textureLoad', @@ -873,14 +982,24 @@ Parameters: const textureType = `texture_storage_3d<${format}, read>`; const viewDescriptor = {}; const sampler = undefined; - const results = await doTextureCalls(t, texture, viewDescriptor, textureType, sampler, calls); + const results = await doTextureCalls( + t, + texture, + viewDescriptor, + textureType, + sampler, + calls, + stage + ); const res = await checkCallResults( t, { texels, descriptor, viewDescriptor }, textureType, sampler, calls, - results + results, + stage, + texture ); t.expectOK(res); }); diff --git a/src/webgpu/shader/execution/expression/call/builtin/textureNumLayers.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/textureNumLayers.spec.ts index ca7ae3d0655c..500376321444 100644 --- a/src/webgpu/shader/execution/expression/call/builtin/textureNumLayers.spec.ts +++ b/src/webgpu/shader/execution/expression/call/builtin/textureNumLayers.spec.ts @@ -51,13 +51,13 @@ Parameters .params(u => u .combine('texture_type', ['texture_2d_array', 'texture_cube_array'] as const) + .combine('view_type', ['full', 'partial'] as const) .beginSubcases() .combine('sampled_type', ['f32', 'i32', 'u32'] as const) - .combine('view_type', ['full', 'partial'] as const) ) .beforeAllSubcases(t => { t.skipIf( - t.isCompatibility && t.params.view === 'partial', + t.isCompatibility && t.params.view_type === 'partial', 'compatibility mode does not support partial layer views' ); t.skipIf( @@ -110,12 +110,11 @@ Parameters .params(u => u .combine('texture_type', ['texture_depth_2d_array', 'texture_depth_cube_array'] as const) - .beginSubcases() .combine('view_type', ['full', 'partial'] as const) ) .beforeAllSubcases(t => { t.skipIf( - t.isCompatibility && t.params.view === 'partial', + t.isCompatibility && t.params.view_type === 'partial', 'compatibility mode does not support partial layer views' ); t.skipIf( @@ -184,14 +183,20 @@ Parameters .params(u => u .combineWithParams(TexelFormats) + .combine('view_type', ['full', 'partial'] as const) .beginSubcases() .combine('access_mode', ['read', 'write', 'read_write'] as const) .filter( t => t.access_mode !== 'read_write' || kTextureFormatInfo[t.format].color?.readWriteStorage ) - .combine('view_type', ['full', 'partial'] as const) ) - .beforeAllSubcases(t => t.skipIfTextureFormatNotUsableAsStorageTexture(t.params.format)) + .beforeAllSubcases(t => { + t.skipIf( + t.isCompatibility && t.params.view_type === 'partial', + 'compatibility mode does not support partial layer views' + ); + t.skipIfTextureFormatNotUsableAsStorageTexture(t.params.format); + }) .fn(t => { const { format, access_mode, view_type } = t.params; diff --git a/src/webgpu/shader/execution/expression/call/builtin/textureNumLevels.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/textureNumLevels.spec.ts index 5610701601cb..471a462504d4 100644 --- a/src/webgpu/shader/execution/expression/call/builtin/textureNumLevels.spec.ts +++ b/src/webgpu/shader/execution/expression/call/builtin/textureNumLevels.spec.ts @@ -88,6 +88,7 @@ Parameters const texture = t.createTextureTracked({ format, dimension, + ...(t.isCompatibility && { textureBindingViewDimension: viewDimension }), usage: GPUTextureUsage.TEXTURE_BINDING, size: { width, @@ -157,6 +158,7 @@ Parameters const texture = t.createTextureTracked({ format: 'depth32float', dimension, + ...(t.isCompatibility && { textureBindingViewDimension: viewDimension }), usage: GPUTextureUsage.TEXTURE_BINDING, size: { width, diff --git a/src/webgpu/shader/execution/expression/call/builtin/textureSample.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/textureSample.spec.ts index e1aa3f67328c..b469cc7f01f4 100644 --- a/src/webgpu/shader/execution/expression/call/builtin/textureSample.spec.ts +++ b/src/webgpu/shader/execution/expression/call/builtin/textureSample.spec.ts @@ -1,14 +1,20 @@ export const description = ` Samples a texture. +- TODO: test cube maps with more than 1 mip level. +- TODO: test un-encodable formats. + note: uniformity validation is covered in src/webgpu/shader/validation/uniformity/uniformity.spec.ts `; import { makeTestGroup } from '../../../../../../common/framework/test_group.js'; import { - isCompressedTextureFormat, + isDepthTextureFormat, + isEncodableTextureFormat, kCompressedTextureFormats, + kDepthStencilFormats, kEncodableTextureFormats, + textureDimensionAndFormatCompatible, } from '../../../../../format_info.js'; import { TextureTestMixin } from '../../../../../gpu_test.js'; @@ -16,10 +22,11 @@ import { vec2, vec3, TextureCall, - putDataInTextureThenDrawAndCheckResultsComparedToSoftwareRasterizer, generateTextureBuiltinInputs2D, generateTextureBuiltinInputs3D, kSamplePointMethods, + kShortAddressModes, + kShortAddressModeToAddressMode, doTextureCalls, checkCallResults, createTextureWithRandomDataAndGetTexels, @@ -29,11 +36,13 @@ import { chooseTextureSize, isPotentiallyFilterableAndFillable, skipIfTextureFormatNotSupportedNotAvailableOrNotFilterable, - getDepthOrArrayLayersForViewDimension, getTextureTypeForTextureViewDimension, WGSLTextureSampleTest, + isSupportedViewFormatCombo, + vec1, + generateTextureBuiltinInputs1D, + skipIfNeedsFilteringAndIsUnfilterable, } from './texture_utils.js'; -import { generateCoordBoundaries, generateOffsets } from './utils.js'; const kTestableColorFormats = [...kEncodableTextureFormats, ...kCompressedTextureFormats] as const; @@ -49,164 +58,171 @@ Parameters: * t The sampled, depth, or external texture to sample. * s The sampler type. * coords The texture coordinates used for sampling. -` - ) - .paramsSubcasesOnly(u => - u - .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const) - .combine('coords', generateCoordBoundaries(1)) - ) - .unimplemented(); - -g.test('sampled_2d_coords') - .specURL('https://www.w3.org/TR/WGSL/#texturesample') - .desc( - ` -fn textureSample(t: texture_2d, s: sampler, coords: vec2) -> vec4 -fn textureSample(t: texture_2d, s: sampler, coords: vec2, offset: vec2) -> vec4 - -Parameters: - * t The sampled, depth, or external texture to sample. - * s The sampler type. - * coords The texture coordinates used for sampling. - * offset - * The optional texel offset applied to the unnormalized texture coordinate before sampling the texture. - * This offset is applied before applying any texture wrapping modes. - * The offset expression must be a creation-time expression (e.g. vec2(1, 2)). - * Each offset component must be at least -8 and at most 7. - Values outside of this range will result in a shader-creation error. ` ) .params(u => u .combine('format', kTestableColorFormats) + .filter(t => textureDimensionAndFormatCompatible('1d', t.format)) .filter(t => isPotentiallyFilterableAndFillable(t.format)) - .combine('samplePoints', kSamplePointMethods) + .combine('filt', ['nearest', 'linear'] as const) + .combine('modeU', kShortAddressModes) .beginSubcases() - .combine('addressModeU', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const) - .combine('addressModeV', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const) - .combine('minFilter', ['nearest', 'linear'] as const) - .combine('offset', [false, true] as const) + .combine('samplePoints', kSamplePointMethods) ) .beforeAllSubcases(t => skipIfTextureFormatNotSupportedNotAvailableOrNotFilterable(t, t.params.format) ) .fn(async t => { - const { format, samplePoints, addressModeU, addressModeV, minFilter, offset } = t.params; + const { format, samplePoints, modeU, filt: minFilter } = t.params; // We want at least 4 blocks or something wide enough for 3 mip levels. - const [width, height] = chooseTextureSize({ minSize: 8, minBlocks: 4, format }); + const size = chooseTextureSize({ minSize: 8, minBlocks: 4, format, viewDimension: '1d' }); const descriptor: GPUTextureDescriptor = { format, - size: { width, height }, + dimension: '1d', + size, usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING, }; const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor); const sampler: GPUSamplerDescriptor = { - addressModeU, - addressModeV, + addressModeU: kShortAddressModeToAddressMode[modeU], minFilter, magFilter: minFilter, }; - const calls: TextureCall[] = generateTextureBuiltinInputs2D(50, { + const calls: TextureCall[] = generateTextureBuiltinInputs1D(50, { sampler, method: samplePoints, descriptor, - offset: true, - hashInputs: [format, samplePoints, addressModeU, addressModeV, minFilter, offset], - }).map(({ coords, offset }) => { + derivatives: true, + hashInputs: [format, samplePoints, modeU, minFilter], + }).map(({ coords, derivativeMult }) => { return { builtin: 'textureSample', coordType: 'f', coords, - offset, + derivativeMult, }; }); const viewDescriptor = {}; + const textureType = 'texture_1d'; const results = await doTextureCalls( t, texture, viewDescriptor, - 'texture_2d', + textureType, sampler, - calls + calls, + 'f' ); const res = await checkCallResults( t, { texels, descriptor, viewDescriptor }, - 'texture_2d', + textureType, sampler, calls, - results + results, + 'f', + texture ); t.expectOK(res); }); -g.test('sampled_2d_coords,derivatives') +g.test('sampled_2d_coords') .specURL('https://www.w3.org/TR/WGSL/#texturesample') .desc( ` fn textureSample(t: texture_2d, s: sampler, coords: vec2) -> vec4 fn textureSample(t: texture_2d, s: sampler, coords: vec2, offset: vec2) -> vec4 -test mip level selection based on derivatives - ` +Parameters: + * t The sampled, depth, or external texture to sample. + * s The sampler type. + * coords The texture coordinates used for sampling. + * offset + * The optional texel offset applied to the unnormalized texture coordinate before sampling the texture. + * This offset is applied before applying any texture wrapping modes. + * The offset expression must be a creation-time expression (e.g. vec2(1, 2)). + * Each offset component must be at least -8 and at most 7. + Values outside of this range will result in a shader-creation error. +` ) .params(u => u .combine('format', kTestableColorFormats) .filter(t => isPotentiallyFilterableAndFillable(t.format)) - .combine('mipmapFilter', ['nearest', 'linear'] as const) + .combine('filt', ['nearest', 'linear'] as const) + .combine('modeU', kShortAddressModes) + .combine('modeV', kShortAddressModes) + .combine('offset', [false, true] as const) .beginSubcases() - // note: this is the derivative we want at sample time. It is not the value - // passed directly to the shader. This way if we change the texture size - // or render target size we can compute the correct values to achieve the - // same results. - .combineWithParams([ - { ddx: 0.5, ddy: 0.5 }, // test mag filter - { ddx: 1, ddy: 1 }, // test level 0 - { ddx: 2, ddy: 1 }, // test level 1 via ddx - { ddx: 1, ddy: 4 }, // test level 2 via ddy - { ddx: 1.5, ddy: 1.5 }, // test mix between 1 and 2 - { ddx: 6, ddy: 6 }, // test mix between 2 and 3 (there is no 3 so we should get just 2) - { ddx: 1.5, ddy: 1.5, offset: [7, -8] as const }, // test mix between 1 and 2 with offset - { ddx: 1.5, ddy: 1.5, offset: [3, -3] as const }, // test mix between 1 and 2 with offset - { ddx: 1.5, ddy: 1.5, uvwStart: [-3.5, -4] as const }, // test mix between 1 and 2 with negative coords - ]) - ) - .beforeAllSubcases(t => - skipIfTextureFormatNotSupportedNotAvailableOrNotFilterable(t, t.params.format) + .combine('samplePoints', kSamplePointMethods) ) + .beforeAllSubcases(t => { + skipIfTextureFormatNotSupportedNotAvailableOrNotFilterable(t, t.params.format); + }) .fn(async t => { - const { format, mipmapFilter, ddx, ddy, uvwStart, offset } = t.params; + const { format, samplePoints, modeU, modeV, filt: minFilter, offset } = t.params; + skipIfNeedsFilteringAndIsUnfilterable(t, minFilter, format); // We want at least 4 blocks or something wide enough for 3 mip levels. const [width, height] = chooseTextureSize({ minSize: 8, minBlocks: 4, format }); const descriptor: GPUTextureDescriptor = { format, - mipLevelCount: 3, size: { width, height }, usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING, + mipLevelCount: 3, }; - + const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor); const sampler: GPUSamplerDescriptor = { - addressModeU: 'repeat', - addressModeV: 'repeat', - minFilter: 'linear', - magFilter: 'linear', - mipmapFilter, + addressModeU: kShortAddressModeToAddressMode[modeU], + addressModeV: kShortAddressModeToAddressMode[modeV], + minFilter, + magFilter: minFilter, + mipmapFilter: minFilter, }; + + const calls: TextureCall[] = generateTextureBuiltinInputs2D(50, { + sampler, + method: samplePoints, + descriptor, + derivatives: true, + offset: true, + hashInputs: [format, samplePoints, modeU, modeV, minFilter, offset], + }).map(({ coords, derivativeMult, offset }) => { + return { + builtin: 'textureSample', + coordType: 'f', + coords, + derivativeMult, + offset, + }; + }); const viewDescriptor = {}; - await putDataInTextureThenDrawAndCheckResultsComparedToSoftwareRasterizer( + const textureType = 'texture_2d'; + const results = await doTextureCalls( t, - descriptor, + texture, viewDescriptor, + textureType, + sampler, + calls, + 'f' + ); + const res = await checkCallResults( + t, + { texels, descriptor, viewDescriptor }, + textureType, sampler, - { ddx, ddy, uvwStart, offset } + calls, + results, + 'f', + texture ); + t.expectOK(res); }); g.test('sampled_3d_coords') @@ -235,17 +251,17 @@ Parameters: u .combine('format', kTestableColorFormats) .filter(t => isPotentiallyFilterableAndFillable(t.format)) - .combine('viewDimension', ['3d', 'cube'] as const) - .filter(t => !isCompressedTextureFormat(t.format) || t.viewDimension === 'cube') - .combine('samplePoints', kCubeSamplePointMethods) - .filter(t => t.samplePoints !== 'cube-edges' || t.viewDimension !== '3d') - .beginSubcases() - .combine('addressModeU', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const) - .combine('addressModeV', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const) - .combine('addressModeW', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const) - .combine('minFilter', ['nearest', 'linear'] as const) + .combine('dim', ['3d', 'cube'] as const) + .filter(t => isSupportedViewFormatCombo(t.format, t.dim)) + .combine('filt', ['nearest', 'linear'] as const) + .combine('modeU', kShortAddressModes) + .combine('modeV', kShortAddressModes) + .combine('modeW', kShortAddressModes) .combine('offset', [false, true] as const) - .filter(t => t.viewDimension !== 'cube' || t.offset !== true) + .filter(t => t.dim !== 'cube' || t.offset !== true) + .beginSubcases() + .combine('samplePoints', kCubeSamplePointMethods) + .filter(t => t.samplePoints !== 'cube-edges' || t.dim !== '3d') ) .beforeAllSubcases(t => skipIfTextureFormatNotSupportedNotAvailableOrNotFilterable(t, t.params.format) @@ -253,85 +269,93 @@ Parameters: .fn(async t => { const { format, - viewDimension, + dim: viewDimension, samplePoints, - addressModeU, - addressModeV, - addressModeW, - minFilter, + modeU, + modeV, + modeW, + filt: minFilter, offset, } = t.params; + skipIfNeedsFilteringAndIsUnfilterable(t, minFilter, format); - const [width, height] = chooseTextureSize({ minSize: 8, minBlocks: 2, format, viewDimension }); - const depthOrArrayLayers = getDepthOrArrayLayersForViewDimension(viewDimension); - + const size = chooseTextureSize({ minSize: 8, minBlocks: 2, format, viewDimension }); const descriptor: GPUTextureDescriptor = { format, dimension: viewDimension === '3d' ? '3d' : '2d', ...(t.isCompatibility && { textureBindingViewDimension: viewDimension }), - size: { width, height, depthOrArrayLayers }, + size, usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING, + // MAINTENANCE_TODO: test derivatives with cubemaps by just always setting this to 3. + mipLevelCount: viewDimension === '3d' ? 3 : 1, }; const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor); const sampler: GPUSamplerDescriptor = { - addressModeU, - addressModeV, - addressModeW, + addressModeU: kShortAddressModeToAddressMode[modeU], + addressModeV: kShortAddressModeToAddressMode[modeV], + addressModeW: kShortAddressModeToAddressMode[modeW], minFilter, magFilter: minFilter, + mipmapFilter: minFilter, }; + const hashInputs = [ + format, + viewDimension, + samplePoints, + modeU, + modeV, + modeW, + minFilter, + offset, + ]; const calls: TextureCall[] = ( viewDimension === '3d' ? generateTextureBuiltinInputs3D(50, { method: samplePoints as SamplePointMethods, sampler, descriptor, - hashInputs: [ - format, - viewDimension, - samplePoints, - addressModeU, - addressModeV, - addressModeW, - minFilter, - offset, - ], + derivatives: true, + hashInputs, }) : generateSamplePointsCube(50, { method: samplePoints, sampler, descriptor, - hashInputs: [ - format, - viewDimension, - samplePoints, - addressModeU, - addressModeV, - addressModeW, - minFilter, - ], + derivatives: true, + hashInputs, }) - ).map(({ coords, offset }) => { + ).map(({ coords, derivativeMult, offset }) => { return { builtin: 'textureSample', coordType: 'f', coords, + derivativeMult, offset, }; }); const viewDescriptor = { dimension: viewDimension, }; - const textureType = getTextureTypeForTextureViewDimension(viewDimension); - const results = await doTextureCalls(t, texture, viewDescriptor, textureType, sampler, calls); + const textureType = getTextureTypeForTextureViewDimension(viewDimension)!; + const results = await doTextureCalls( + t, + texture, + viewDescriptor, + textureType, + sampler, + calls, + 'f' + ); const res = await checkCallResults( t, { texels, descriptor, viewDescriptor }, textureType, sampler, calls, - results + results, + 'f', + texture ); t.expectOK(res); }); @@ -355,22 +379,89 @@ Parameters: Values outside of this range will result in a shader-creation error. ` ) - .paramsSubcasesOnly(u => + .params(u => u - .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const) - .combine('coords', generateCoordBoundaries(2)) - .combine('offset', generateOffsets(2)) + .combine('format', kDepthStencilFormats) + // filter out stencil only formats + .filter(t => isDepthTextureFormat(t.format)) + // MAINTENANCE_TODO: Remove when support for depth24plus, depth24plus-stencil8, and depth32float-stencil8 is added. + .filter(t => isEncodableTextureFormat(t.format)) + .combine('filt', ['nearest', 'linear'] as const) + .combine('modeU', kShortAddressModes) + .combine('modeV', kShortAddressModes) + .combine('offset', [false, true] as const) + .beginSubcases() + .combine('samplePoints', kSamplePointMethods) ) - .unimplemented(); + .fn(async t => { + const { format, samplePoints, modeU, modeV, filt: minFilter, offset } = t.params; + + // We want at least 4 blocks or something wide enough for 3 mip levels. + const [width, height] = chooseTextureSize({ minSize: 8, minBlocks: 4, format }); + const descriptor: GPUTextureDescriptor = { + format, + size: { width, height }, + usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING, + mipLevelCount: 3, + }; + const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor); + const sampler: GPUSamplerDescriptor = { + addressModeU: kShortAddressModeToAddressMode[modeU], + addressModeV: kShortAddressModeToAddressMode[modeV], + minFilter, + magFilter: minFilter, + mipmapFilter: minFilter, + }; + + const calls: TextureCall[] = generateTextureBuiltinInputs2D(50, { + sampler, + method: samplePoints, + descriptor, + derivatives: true, + offset, + hashInputs: [format, samplePoints, modeU, modeV, minFilter, offset], + }).map(({ coords, derivativeMult, offset }) => { + return { + builtin: 'textureSample', + coordType: 'f', + coords, + derivativeMult, + offset, + }; + }); + + const viewDescriptor = {}; + const textureType = 'texture_depth_2d'; + const results = await doTextureCalls( + t, + texture, + viewDescriptor, + textureType, + sampler, + calls, + 'f' + ); + const res = await checkCallResults( + t, + { texels, descriptor, viewDescriptor }, + textureType, + sampler, + calls, + results, + 'f', + texture + ); + t.expectOK(res); + }); g.test('sampled_array_2d_coords') .specURL('https://www.w3.org/TR/WGSL/#texturesample') .desc( ` -C is i32 or u32 +A is i32 or u32 -fn textureSample(t: texture_2d_array, s: sampler, coords: vec2, array_index: C) -> vec4 -fn textureSample(t: texture_2d_array, s: sampler, coords: vec2, array_index: C, offset: vec2) -> vec4 +fn textureSample(t: texture_2d_array, s: sampler, coords: vec2, array_index: A) -> vec4 +fn textureSample(t: texture_2d_array, s: sampler, coords: vec2, array_index: A, offset: vec2) -> vec4 Parameters: * t The sampled, depth, or external texture to sample. @@ -385,24 +476,94 @@ Parameters: Values outside of this range will result in a shader-creation error. ` ) - .paramsSubcasesOnly(u => + .params(u => u - .combine('C', ['i32', 'u32'] as const) - .combine('C_value', [-1, 0, 1, 2, 3, 4] as const) - .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const) - .combine('coords', generateCoordBoundaries(2)) - /* array_index not param'd as out-of-bounds is implementation specific */ - .combine('offset', generateOffsets(2)) + .combine('format', kTestableColorFormats) + .filter(t => isPotentiallyFilterableAndFillable(t.format)) + .combine('filt', ['nearest', 'linear'] as const) + .combine('modeU', kShortAddressModes) + .combine('modeV', kShortAddressModes) + .combine('offset', [false, true] as const) + .beginSubcases() + .combine('samplePoints', kSamplePointMethods) + .combine('A', ['i32', 'u32'] as const) + ) + .beforeAllSubcases(t => + skipIfTextureFormatNotSupportedNotAvailableOrNotFilterable(t, t.params.format) ) - .unimplemented(); + .fn(async t => { + const { format, samplePoints, A, modeU, modeV, filt: minFilter, offset } = t.params; + skipIfNeedsFilteringAndIsUnfilterable(t, minFilter, format); + + // We want at least 4 blocks or something wide enough for 3 mip levels. + const [width, height] = chooseTextureSize({ minSize: 8, minBlocks: 4, format }); + const depthOrArrayLayers = 4; + + const descriptor: GPUTextureDescriptor = { + format, + size: { width, height, depthOrArrayLayers }, + usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING, + mipLevelCount: 3, + }; + const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor); + const sampler: GPUSamplerDescriptor = { + addressModeU: kShortAddressModeToAddressMode[modeU], + addressModeV: kShortAddressModeToAddressMode[modeV], + minFilter, + magFilter: minFilter, + mipmapFilter: minFilter, + }; + + const calls: TextureCall[] = generateTextureBuiltinInputs2D(50, { + method: samplePoints, + sampler, + descriptor, + derivatives: true, + arrayIndex: { num: texture.depthOrArrayLayers, type: A }, + offset, + hashInputs: [format, samplePoints, A, modeU, modeV, minFilter, offset], + }).map(({ coords, derivativeMult, arrayIndex, offset }) => { + return { + builtin: 'textureSample', + coordType: 'f', + coords, + derivativeMult, + arrayIndex, + arrayIndexType: A === 'i32' ? 'i' : 'u', + offset, + }; + }); + const textureType = 'texture_2d_array'; + const viewDescriptor = {}; + const results = await doTextureCalls( + t, + texture, + viewDescriptor, + textureType, + sampler, + calls, + 'f' + ); + const res = await checkCallResults( + t, + { texels, descriptor, viewDescriptor }, + textureType, + sampler, + calls, + results, + 'f', + texture + ); + t.expectOK(res); + }); g.test('sampled_array_3d_coords') .specURL('https://www.w3.org/TR/WGSL/#texturesample') .desc( ` -C is i32 or u32 +A is i32 or u32 -fn textureSample(t: texture_cube_array, s: sampler, coords: vec3, array_index: C) -> vec4 +fn textureSample(t: texture_cube_array, s: sampler, coords: vec3, array_index: A) -> vec4 Parameters: * t The sampled, depth, or external texture to sample. @@ -411,16 +572,90 @@ Parameters: * array_index The 0-based texture array index to sample. ` ) - .paramsSubcasesOnly( - u => - u - .combine('C', ['i32', 'u32'] as const) - .combine('C_value', [-1, 0, 1, 2, 3, 4] as const) - .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const) - .combine('coords', generateCoordBoundaries(3)) - /* array_index not param'd as out-of-bounds is implementation specific */ + .params(u => + u + .combine('format', kTestableColorFormats) + .filter(t => isPotentiallyFilterableAndFillable(t.format)) + .combine('filt', ['nearest', 'linear'] as const) + .combine('mode', kShortAddressModes) + .beginSubcases() + .combine('samplePoints', kCubeSamplePointMethods) + .combine('A', ['i32', 'u32'] as const) ) - .unimplemented(); + .beforeAllSubcases(t => { + skipIfTextureFormatNotSupportedNotAvailableOrNotFilterable(t, t.params.format); + t.skipIfTextureViewDimensionNotSupported('cube-array'); + }) + .fn(async t => { + const { format, samplePoints, A, mode, filt: minFilter } = t.params; + skipIfNeedsFilteringAndIsUnfilterable(t, minFilter, format); + + const viewDimension: GPUTextureViewDimension = 'cube-array'; + const size = chooseTextureSize({ + minSize: 32, + minBlocks: 4, + format, + viewDimension, + }); + const descriptor: GPUTextureDescriptor = { + format, + size, + usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING, + // MAINTENANCE_TODO: test derivatives with cubemaps by setting this to 3. + mipLevelCount: 1, + }; + const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor); + const sampler: GPUSamplerDescriptor = { + addressModeU: kShortAddressModeToAddressMode[mode], + addressModeV: kShortAddressModeToAddressMode[mode], + addressModeW: kShortAddressModeToAddressMode[mode], + minFilter, + magFilter: minFilter, + mipmapFilter: minFilter, + }; + + const calls: TextureCall[] = generateSamplePointsCube(50, { + method: samplePoints, + sampler, + descriptor, + derivatives: true, + arrayIndex: { num: texture.depthOrArrayLayers / 6, type: A }, + hashInputs: [format, viewDimension, A, samplePoints, mode, minFilter], + }).map(({ coords, derivativeMult, arrayIndex }) => { + return { + builtin: 'textureSample', + coordType: 'f', + coords, + derivativeMult, + arrayIndex, + arrayIndexType: A === 'i32' ? 'i' : 'u', + }; + }); + const viewDescriptor = { + dimension: viewDimension, + }; + const textureType = getTextureTypeForTextureViewDimension(viewDimension); + const results = await doTextureCalls( + t, + texture, + viewDescriptor, + textureType, + sampler, + calls, + 'f' + ); + const res = await checkCallResults( + t, + { texels, descriptor, viewDescriptor }, + textureType, + sampler, + calls, + results, + 'f', + texture + ); + t.expectOK(res); + }); g.test('depth_3d_coords') .specURL('https://www.w3.org/TR/WGSL/#texturesample') @@ -434,21 +669,106 @@ Parameters: * coords The texture coordinates used for sampling. ` ) - .paramsSubcasesOnly(u => + .params(u => u - .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const) - .combine('coords', generateCoordBoundaries(3)) + .combine('format', kDepthStencilFormats) + // filter out stencil only formats + .filter(t => isDepthTextureFormat(t.format)) + // MAINTENANCE_TODO: Remove when support for depth24plus, depth24plus-stencil8, and depth32float-stencil8 is added. + .filter(t => isEncodableTextureFormat(t.format)) + .combineWithParams([ + { viewDimension: 'cube' }, + { viewDimension: 'cube-array', A: 'i32' }, + { viewDimension: 'cube-array', A: 'u32' }, + ] as const) + .combine('filt', ['nearest', 'linear'] as const) + .combine('mode', kShortAddressModes) + .beginSubcases() + .combine('samplePoints', kCubeSamplePointMethods) ) - .unimplemented(); + .beforeAllSubcases(t => { + t.skipIfTextureViewDimensionNotSupported(t.params.viewDimension); + }) + .fn(async t => { + const { format, viewDimension, samplePoints, A, mode, filt: minFilter } = t.params; + + const size = chooseTextureSize({ + minSize: 32, + minBlocks: 4, + format, + viewDimension, + }); + const descriptor: GPUTextureDescriptor = { + format, + size, + usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING, + // MAINTENANCE_TODO: test derivatives with cubemaps by setting this to 3. + mipLevelCount: 1, + ...(t.isCompatibility && { textureBindingViewDimension: viewDimension }), + }; + const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor); + const sampler: GPUSamplerDescriptor = { + addressModeU: kShortAddressModeToAddressMode[mode], + addressModeV: kShortAddressModeToAddressMode[mode], + addressModeW: kShortAddressModeToAddressMode[mode], + minFilter, + magFilter: minFilter, + mipmapFilter: minFilter, + }; + + const calls: TextureCall[] = generateSamplePointsCube(50, { + method: samplePoints, + sampler, + descriptor, + derivatives: true, + arrayIndex: A ? { num: texture.depthOrArrayLayers / 6, type: A } : undefined, + hashInputs: [format, viewDimension, samplePoints, mode, minFilter], + }).map(({ coords, derivativeMult, arrayIndex }) => { + return { + builtin: 'textureSample', + coordType: 'f', + coords, + derivativeMult, + arrayIndex, + arrayIndexType: A ? (A === 'i32' ? 'i' : 'u') : undefined, + }; + }); + const viewDescriptor = { + dimension: viewDimension, + }; + const textureType = + viewDimension === 'cube' ? 'texture_depth_cube' : 'texture_depth_cube_array'; + const results = await doTextureCalls( + t, + texture, + viewDescriptor, + textureType, + sampler, + calls, + 'f' + ); + + const res = await checkCallResults( + t, + { texels, descriptor, viewDescriptor }, + textureType, + sampler, + calls, + results, + 'f', + texture + ); + t.expectOK(res); + }); g.test('depth_array_2d_coords') .specURL('https://www.w3.org/TR/WGSL/#texturesample') .desc( ` -C is i32 or u32 +A is i32 or u32 -fn textureSample(t: texture_depth_2d_array, s: sampler, coords: vec2, array_index: C) -> f32 -fn textureSample(t: texture_depth_2d_array, s: sampler, coords: vec2, array_index: C, offset: vec2) -> f32 +fn textureSample(t: texture_depth_2d_array, s: sampler, coords: vec2, array_index: A) -> f32 +fn textureSample(t: texture_depth_2d_array, s: sampler, coords: vec2, array_index: A, offset: vec2) -> f32 Parameters: * t The sampled, depth, or external texture to sample. @@ -463,24 +783,92 @@ Parameters: Values outside of this range will result in a shader-creation error. ` ) - .paramsSubcasesOnly(u => + .params(u => u - .combine('C', ['i32', 'u32'] as const) - .combine('C_value', [-1, 0, 1, 2, 3, 4] as const) - .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const) - .combine('coords', generateCoordBoundaries(2)) - /* array_index not param'd as out-of-bounds is implementation specific */ - .combine('offset', generateOffsets(2)) + .combine('format', kDepthStencilFormats) + // filter out stencil only formats + .filter(t => isDepthTextureFormat(t.format)) + // MAINTENANCE_TODO: Remove when support for depth24plus, depth24plus-stencil8, and depth32float-stencil8 is added. + .filter(t => isEncodableTextureFormat(t.format)) + .combine('filt', ['nearest', 'linear'] as const) + .combine('mode', kShortAddressModes) + .combine('offset', [false, true] as const) + .beginSubcases() + .combine('samplePoints', kSamplePointMethods) + .combine('A', ['i32', 'u32'] as const) + .combine('L', ['i32', 'u32'] as const) ) - .unimplemented(); + .fn(async t => { + const { format, samplePoints, mode, filt: minFilter, A, L, offset } = t.params; + + // We want at least 4 blocks or something wide enough for 3 mip levels. + const [width, height] = chooseTextureSize({ minSize: 8, minBlocks: 4, format }); + const descriptor: GPUTextureDescriptor = { + format, + size: { width, height }, + usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING, + mipLevelCount: 3, + ...(t.isCompatibility && { textureBindingViewDimension: '2d-array' }), + }; + const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor); + const sampler: GPUSamplerDescriptor = { + addressModeU: kShortAddressModeToAddressMode[mode], + addressModeV: kShortAddressModeToAddressMode[mode], + minFilter, + magFilter: minFilter, + mipmapFilter: minFilter, + }; + + const calls: TextureCall[] = generateTextureBuiltinInputs2D(50, { + method: samplePoints, + sampler, + descriptor, + derivatives: true, + arrayIndex: { num: texture.depthOrArrayLayers, type: A }, + offset, + hashInputs: [format, samplePoints, mode, minFilter, L, A, offset], + }).map(({ coords, derivativeMult, arrayIndex, offset }) => { + return { + builtin: 'textureSample', + coordType: 'f', + coords, + derivativeMult, + arrayIndex, + arrayIndexType: A === 'i32' ? 'i' : 'u', + offset, + }; + }); + const textureType = 'texture_depth_2d_array'; + const viewDescriptor: GPUTextureViewDescriptor = { dimension: '2d-array' }; + const results = await doTextureCalls( + t, + texture, + viewDescriptor, + textureType, + sampler, + calls, + 'f' + ); + const res = await checkCallResults( + t, + { texels, descriptor, viewDescriptor }, + textureType, + sampler, + calls, + results, + 'f', + texture + ); + t.expectOK(res); + }); g.test('depth_array_3d_coords') .specURL('https://www.w3.org/TR/WGSL/#texturesample') .desc( ` -C is i32 or u32 +A is i32 or u32 -fn textureSample(t: texture_depth_cube_array, s: sampler, coords: vec3, array_index: C) -> f32 +fn textureSample(t: texture_depth_cube_array, s: sampler, coords: vec3, array_index: A) -> f32 Parameters: * t The sampled, depth, or external texture to sample. @@ -489,13 +877,90 @@ Parameters: * array_index The 0-based texture array index to sample. ` ) - .paramsSubcasesOnly( - u => - u - .combine('C', ['i32', 'u32'] as const) - .combine('C_value', [-1, 0, 1, 2, 3, 4] as const) - .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const) - .combine('coords', generateCoordBoundaries(3)) - /* array_index not param'd as out-of-bounds is implementation specific */ + .params(u => + u + .combine('format', kDepthStencilFormats) + // filter out stencil only formats + .filter(t => isDepthTextureFormat(t.format)) + // MAINTENANCE_TODO: Remove when support for depth24plus, depth24plus-stencil8, and depth32float-stencil8 is added. + .filter(t => isEncodableTextureFormat(t.format)) + .combine('filt', ['nearest', 'linear'] as const) + .combine('mode', kShortAddressModes) + .beginSubcases() + .combine('samplePoints', kCubeSamplePointMethods) + .combine('A', ['i32', 'u32'] as const) ) - .unimplemented(); + .beforeAllSubcases(t => { + t.skipIfTextureViewDimensionNotSupported('cube-array'); + }) + .fn(async t => { + const { format, samplePoints, A, mode, filt: minFilter } = t.params; + + const viewDimension: GPUTextureViewDimension = 'cube-array'; + const size = chooseTextureSize({ + minSize: 32, + minBlocks: 4, + format, + viewDimension, + }); + const descriptor: GPUTextureDescriptor = { + format, + size, + usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING, + // MAINTENANCE_TODO: test derivatives with cubemaps by setting this to 3. + mipLevelCount: 1, + ...(t.isCompatibility && { textureBindingViewDimension: viewDimension }), + }; + const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor); + const sampler: GPUSamplerDescriptor = { + addressModeU: kShortAddressModeToAddressMode[mode], + addressModeV: kShortAddressModeToAddressMode[mode], + addressModeW: kShortAddressModeToAddressMode[mode], + minFilter, + magFilter: minFilter, + mipmapFilter: minFilter, + }; + + const calls: TextureCall[] = generateSamplePointsCube(50, { + method: samplePoints, + sampler, + descriptor, + derivatives: true, + arrayIndex: A ? { num: texture.depthOrArrayLayers / 6, type: A } : undefined, + hashInputs: [format, viewDimension, samplePoints, mode, minFilter], + }).map(({ coords, derivativeMult, arrayIndex }) => { + return { + builtin: 'textureSample', + coordType: 'f', + coords, + derivativeMult, + arrayIndex, + arrayIndexType: A ? (A === 'i32' ? 'i' : 'u') : undefined, + }; + }); + const viewDescriptor = { + dimension: viewDimension, + }; + const textureType = 'texture_depth_cube_array'; + const results = await doTextureCalls( + t, + texture, + viewDescriptor, + textureType, + sampler, + calls, + 'f' + ); + + const res = await checkCallResults( + t, + { texels, descriptor, viewDescriptor }, + textureType, + sampler, + calls, + results, + 'f', + texture + ); + t.expectOK(res); + }); diff --git a/src/webgpu/shader/execution/expression/call/builtin/textureSampleBaseClampToEdge.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/textureSampleBaseClampToEdge.spec.ts index 452c3b4df710..b14297876995 100644 --- a/src/webgpu/shader/execution/expression/call/builtin/textureSampleBaseClampToEdge.spec.ts +++ b/src/webgpu/shader/execution/expression/call/builtin/textureSampleBaseClampToEdge.spec.ts @@ -13,6 +13,9 @@ import { doTextureCalls, generateTextureBuiltinInputs2D, kSamplePointMethods, + kShortAddressModes, + kShortAddressModeToAddressMode, + kShortShaderStages, TextureCall, vec2, WGSLTextureSampleTest, @@ -54,15 +57,22 @@ Parameters: ) .params(u => u + .combine('stage', kShortShaderStages) .combine('textureType', ['texture_2d', 'texture_external'] as const) + .combine('filt', ['nearest', 'linear'] as const) + .combine('modeU', kShortAddressModes) + .combine('modeV', kShortAddressModes) .beginSubcases() .combine('samplePoints', kSamplePointMethods) - .combine('addressModeU', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const) - .combine('addressModeV', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const) - .combine('minFilter', ['nearest', 'linear'] as const) + ) + .beforeAllSubcases(t => + t.skipIf( + t.params.textureType === 'texture_external' && typeof VideoFrame === 'undefined', + 'VideoFrames are not supported' + ) ) .fn(async t => { - const { textureType, samplePoints, addressModeU, addressModeV, minFilter } = t.params; + const { textureType, stage, samplePoints, modeU, modeV, filt: minFilter } = t.params; const descriptor: GPUTextureDescriptor = { format: 'rgba8unorm', @@ -79,8 +89,8 @@ Parameters: ); try { const sampler: GPUSamplerDescriptor = { - addressModeU, - addressModeV, + addressModeU: kShortAddressModeToAddressMode[modeU], + addressModeV: kShortAddressModeToAddressMode[modeV], minFilter, magFilter: minFilter, mipmapFilter: minFilter, @@ -90,7 +100,7 @@ Parameters: method: samplePoints, sampler, descriptor, - hashInputs: [samplePoints, addressModeU, addressModeV, minFilter], + hashInputs: [samplePoints, modeU, modeV, minFilter], }).map(({ coords }) => { return { builtin: 'textureSampleBaseClampToEdge', @@ -99,14 +109,23 @@ Parameters: }; }); const viewDescriptor = {}; - const results = await doTextureCalls(t, texture, viewDescriptor, textureType, sampler, calls); + const results = await doTextureCalls( + t, + texture, + viewDescriptor, + textureType, + sampler, + calls, + stage + ); const res = await checkCallResults( t, { texels, descriptor, viewDescriptor }, textureType, sampler, calls, - results + results, + stage ); t.expectOK(res); } finally { diff --git a/src/webgpu/shader/execution/expression/call/builtin/textureSampleBias.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/textureSampleBias.spec.ts index 1c61c1a5f217..f49322f878d6 100644 --- a/src/webgpu/shader/execution/expression/call/builtin/textureSampleBias.spec.ts +++ b/src/webgpu/shader/execution/expression/call/builtin/textureSampleBias.spec.ts @@ -2,14 +2,42 @@ export const description = ` Execution tests for the 'textureSampleBias' builtin function Samples a texture with a bias to the mip level. + +- TODO: test cube maps with more than one mip level. +- TODO: Test un-encodable formats. `; import { makeTestGroup } from '../../../../../../common/framework/test_group.js'; -import { GPUTest } from '../../../../../gpu_test.js'; +import { kCompressedTextureFormats, kEncodableTextureFormats } from '../../../../../format_info.js'; +import { TextureTestMixin } from '../../../../../gpu_test.js'; + +import { + vec2, + vec3, + TextureCall, + generateTextureBuiltinInputs2D, + generateTextureBuiltinInputs3D, + kSamplePointMethods, + kShortAddressModes, + kShortAddressModeToAddressMode, + doTextureCalls, + checkCallResults, + createTextureWithRandomDataAndGetTexels, + generateSamplePointsCube, + kCubeSamplePointMethods, + SamplePointMethods, + chooseTextureSize, + isPotentiallyFilterableAndFillable, + skipIfTextureFormatNotSupportedNotAvailableOrNotFilterable, + getTextureTypeForTextureViewDimension, + WGSLTextureSampleTest, + isSupportedViewFormatCombo, + skipIfNeedsFilteringAndIsUnfilterable, +} from './texture_utils.js'; -import { generateCoordBoundaries, generateOffsets } from './utils.js'; +const kTestableColorFormats = [...kEncodableTextureFormats, ...kCompressedTextureFormats] as const; -export const g = makeTestGroup(GPUTest); +export const g = makeTestGroup(TextureTestMixin(WGSLTextureSampleTest)); g.test('sampled_2d_coords') .specURL('https://www.w3.org/TR/WGSL/#texturesamplebias') @@ -31,14 +59,82 @@ Parameters: Values outside of this range will result in a shader-creation error. ` ) - .paramsSubcasesOnly(u => + .params(u => u - .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat']) - .combine('coords', generateCoordBoundaries(2)) - .combine('bias', [-16.1, -16, 0, 1, 15.99, 16] as const) - .combine('offset', generateOffsets(2)) + .combine('format', kTestableColorFormats) + .filter(t => isPotentiallyFilterableAndFillable(t.format)) + .combine('filt', ['nearest', 'linear'] as const) + .combine('modeU', kShortAddressModes) + .combine('modeV', kShortAddressModes) + .combine('offset', [false, true] as const) + .beginSubcases() + .combine('samplePoints', kSamplePointMethods) + ) + .beforeAllSubcases(t => + skipIfTextureFormatNotSupportedNotAvailableOrNotFilterable(t, t.params.format) ) - .unimplemented(); + .fn(async t => { + const { format, samplePoints, modeU, modeV, filt: minFilter, offset } = t.params; + skipIfNeedsFilteringAndIsUnfilterable(t, minFilter, format); + + // We want at least 4 blocks or something wide enough for 3 mip levels. + const [width, height] = chooseTextureSize({ minSize: 8, minBlocks: 4, format }); + + const descriptor: GPUTextureDescriptor = { + format, + size: { width, height }, + usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING, + mipLevelCount: 3, + }; + const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor); + const sampler: GPUSamplerDescriptor = { + addressModeU: kShortAddressModeToAddressMode[modeU], + addressModeV: kShortAddressModeToAddressMode[modeV], + minFilter, + magFilter: minFilter, + mipmapFilter: minFilter, + }; + + const calls: TextureCall[] = generateTextureBuiltinInputs2D(50, { + sampler, + method: samplePoints, + descriptor, + bias: true, + offset, + hashInputs: [format, samplePoints, modeU, modeV, minFilter, offset], + }).map(({ coords, derivativeMult, offset, bias }) => { + return { + builtin: 'textureSampleBias', + coordType: 'f', + coords, + derivativeMult, + bias, + offset, + }; + }); + const viewDescriptor = {}; + const textureType = 'texture_2d'; + const results = await doTextureCalls( + t, + texture, + viewDescriptor, + textureType, + sampler, + calls, + 'f' + ); + const res = await checkCallResults( + t, + { texels, descriptor, viewDescriptor }, + textureType, + sampler, + calls, + results, + 'f', + texture + ); + t.expectOK(res); + }); g.test('sampled_3d_coords') .specURL('https://www.w3.org/TR/WGSL/#texturesamplebias') @@ -63,23 +159,126 @@ Parameters: ) .params(u => u - .combine('texture_type', ['texture_3d', 'texture_cube'] as const) + .combine('format', kTestableColorFormats) + .filter(t => isPotentiallyFilterableAndFillable(t.format)) + .combine('dim', ['3d', 'cube'] as const) + .filter(t => isSupportedViewFormatCombo(t.format, t.dim)) + .combine('filt', ['nearest', 'linear'] as const) + .combine('modeU', kShortAddressModes) + .combine('modeV', kShortAddressModes) + .combine('modeW', kShortAddressModes) + .combine('offset', [false, true] as const) + .filter(t => t.dim !== 'cube' || t.offset !== true) .beginSubcases() - .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat']) - .combine('coords', generateCoordBoundaries(3)) - .combine('bias', [-16.1, -16, 0, 1, 15.99, 16] as const) - .combine('offset', generateOffsets(3)) + .combine('samplePoints', kCubeSamplePointMethods) + .filter(t => t.samplePoints !== 'cube-edges' || t.dim !== '3d') + ) + .beforeAllSubcases(t => + skipIfTextureFormatNotSupportedNotAvailableOrNotFilterable(t, t.params.format) ) - .unimplemented(); + .fn(async t => { + const { + format, + dim: viewDimension, + samplePoints, + modeU, + modeV, + modeW, + filt: minFilter, + offset, + } = t.params; + skipIfNeedsFilteringAndIsUnfilterable(t, minFilter, format); + + const size = chooseTextureSize({ minSize: 8, minBlocks: 2, format, viewDimension }); + const descriptor: GPUTextureDescriptor = { + format, + dimension: viewDimension === '3d' ? '3d' : '2d', + ...(t.isCompatibility && { textureBindingViewDimension: viewDimension }), + size, + // MAINTENANCE_TODO: use 3 for cube maps when derivatives are supported for cube maps. + mipLevelCount: viewDimension === '3d' ? 3 : 1, + usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING, + }; + const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor); + const sampler: GPUSamplerDescriptor = { + addressModeU: kShortAddressModeToAddressMode[modeU], + addressModeV: kShortAddressModeToAddressMode[modeV], + addressModeW: kShortAddressModeToAddressMode[modeW], + minFilter, + magFilter: minFilter, + }; + + const hashInputs = [ + format, + viewDimension, + samplePoints, + modeU, + modeV, + modeW, + minFilter, + offset, + ]; + const calls: TextureCall[] = ( + viewDimension === '3d' + ? generateTextureBuiltinInputs3D(50, { + method: samplePoints as SamplePointMethods, + sampler, + descriptor, + bias: true, + offset, + hashInputs, + }) + : generateSamplePointsCube(50, { + method: samplePoints, + sampler, + descriptor, + bias: true, + hashInputs, + }) + ).map(({ coords, derivativeMult, offset, bias }) => { + return { + builtin: 'textureSampleBias', + coordType: 'f', + coords, + derivativeMult, + bias, + offset, + }; + }); + const viewDescriptor = { + dimension: viewDimension, + }; + const textureType = getTextureTypeForTextureViewDimension(viewDimension)!; + const results = await doTextureCalls( + t, + texture, + viewDescriptor, + textureType, + sampler, + calls, + 'f' + ); + const res = await checkCallResults( + t, + { texels, descriptor, viewDescriptor }, + textureType, + sampler, + calls, + results, + 'f', + texture + ); + t.expectOK(res); + }); g.test('arrayed_2d_coords') .specURL('https://www.w3.org/TR/WGSL/#texturesamplebias') .desc( ` -C: i32, u32 +A: i32, u32 -fn textureSampleBias(t: texture_2d_array, s: sampler, coords: vec2, array_index: C, bias: f32) -> vec4 -fn textureSampleBias(t: texture_2d_array, s: sampler, coords: vec2, array_index: C, bias: f32, offset: vec2) -> vec4 +fn textureSampleBias(t: texture_2d_array, s: sampler, coords: vec2, array_index: A, bias: f32) -> vec4 +fn textureSampleBias(t: texture_2d_array, s: sampler, coords: vec2, array_index: A, bias: f32, offset: vec2) -> vec4 Parameters: * t: The sampled texture to read from @@ -95,25 +294,95 @@ Parameters: Values outside of this range will result in a shader-creation error. ` ) - .paramsSubcasesOnly(u => + .params(u => u - .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat']) - .combine('coords', generateCoordBoundaries(2)) - .combine('C', ['i32', 'u32'] as const) - .combine('C_value', [-1, 0, 1, 2, 3, 4] as const) - /* array_index not param'd as out-of-bounds is implementation specific */ - .combine('bias', [-16.1, -16, 0, 1, 15.99, 16] as const) - .combine('offset', generateOffsets(2)) + .combine('format', kTestableColorFormats) + .filter(t => isPotentiallyFilterableAndFillable(t.format)) + .combine('filt', ['nearest', 'linear'] as const) + .combine('modeU', kShortAddressModes) + .combine('modeV', kShortAddressModes) + .combine('offset', [false, true] as const) + .beginSubcases() + .combine('samplePoints', kSamplePointMethods) + .combine('A', ['i32', 'u32'] as const) + ) + .beforeAllSubcases(t => + skipIfTextureFormatNotSupportedNotAvailableOrNotFilterable(t, t.params.format) ) - .unimplemented(); + .fn(async t => { + const { format, samplePoints, A, modeU, modeV, filt: minFilter, offset } = t.params; + skipIfNeedsFilteringAndIsUnfilterable(t, minFilter, format); + + // We want at least 4 blocks or something wide enough for 3 mip levels. + const [width, height] = chooseTextureSize({ minSize: 8, minBlocks: 4, format }); + const depthOrArrayLayers = 4; + + const descriptor: GPUTextureDescriptor = { + format, + size: { width, height, depthOrArrayLayers }, + usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING, + mipLevelCount: 3, + }; + const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor); + const sampler: GPUSamplerDescriptor = { + addressModeU: kShortAddressModeToAddressMode[modeU], + addressModeV: kShortAddressModeToAddressMode[modeV], + minFilter, + magFilter: minFilter, + mipmapFilter: minFilter, + }; + + const calls: TextureCall[] = generateTextureBuiltinInputs2D(50, { + method: samplePoints, + sampler, + descriptor, + arrayIndex: { num: texture.depthOrArrayLayers, type: A }, + bias: true, + offset, + hashInputs: [format, samplePoints, A, modeU, modeV, minFilter, offset], + }).map(({ coords, derivativeMult, arrayIndex, bias, offset }) => { + return { + builtin: 'textureSampleBias', + coordType: 'f', + coords, + derivativeMult, + arrayIndex, + arrayIndexType: A === 'i32' ? 'i' : 'u', + bias, + offset, + }; + }); + const textureType = 'texture_2d_array'; + const viewDescriptor = {}; + const results = await doTextureCalls( + t, + texture, + viewDescriptor, + textureType, + sampler, + calls, + 'f' + ); + const res = await checkCallResults( + t, + { texels, descriptor, viewDescriptor }, + textureType, + sampler, + calls, + results, + 'f', + texture + ); + t.expectOK(res); + }); g.test('arrayed_3d_coords') .specURL('https://www.w3.org/TR/WGSL/#texturesamplebias') .desc( ` -C: i32, u32 +A: i32, u32 -fn textureSampleBias(t: texture_cube_array, s: sampler, coords: vec3, array_index: C, bias: f32) -> vec4 +fn textureSampleBias(t: texture_cube_array, s: sampler, coords: vec3, array_index: A, bias: f32) -> vec4 Parameters: * t: The sampled texture to read from @@ -129,13 +398,88 @@ Parameters: Values outside of this range will result in a shader-creation error. ` ) - .paramsSubcasesOnly(u => + .params(u => u - .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat']) - .combine('coords', generateCoordBoundaries(3)) - .combine('C', ['i32', 'u32'] as const) - .combine('C_value', [-1, 0, 1, 2, 3, 4] as const) - /* array_index not param'd as out-of-bounds is implementation specific */ - .combine('bias', [-16.1, -16, 0, 1, 15.99, 16] as const) + .combine('format', kTestableColorFormats) + .filter(t => isPotentiallyFilterableAndFillable(t.format)) + .combine('filt', ['nearest', 'linear'] as const) + .combine('mode', kShortAddressModes) + .beginSubcases() + .combine('samplePoints', kCubeSamplePointMethods) + .combine('A', ['i32', 'u32'] as const) ) - .unimplemented(); + .beforeAllSubcases(t => { + skipIfTextureFormatNotSupportedNotAvailableOrNotFilterable(t, t.params.format); + t.skipIfTextureViewDimensionNotSupported('cube-array'); + }) + .fn(async t => { + const { format, samplePoints, A, mode, filt: minFilter } = t.params; + skipIfNeedsFilteringAndIsUnfilterable(t, minFilter, format); + + const viewDimension: GPUTextureViewDimension = 'cube-array'; + const size = chooseTextureSize({ + minSize: 32, + minBlocks: 4, + format, + viewDimension, + }); + const descriptor: GPUTextureDescriptor = { + format, + size, + usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING, + // MAINTENANCE_TODO: use 3 for cube maps when derivatives are supported for cube maps. + mipLevelCount: 1, + }; + const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor); + const sampler: GPUSamplerDescriptor = { + addressModeU: kShortAddressModeToAddressMode[mode], + addressModeV: kShortAddressModeToAddressMode[mode], + addressModeW: kShortAddressModeToAddressMode[mode], + minFilter, + magFilter: minFilter, + mipmapFilter: minFilter, + }; + + const calls: TextureCall[] = generateSamplePointsCube(50, { + method: samplePoints, + sampler, + descriptor, + bias: true, + arrayIndex: { num: texture.depthOrArrayLayers / 6, type: A }, + hashInputs: [format, viewDimension, A, samplePoints, mode, minFilter], + }).map(({ coords, derivativeMult, arrayIndex, bias }) => { + return { + builtin: 'textureSampleBias', + coordType: 'f', + coords, + derivativeMult, + arrayIndex, + arrayIndexType: A === 'i32' ? 'i' : 'u', + bias, + }; + }); + const viewDescriptor = { + dimension: viewDimension, + }; + const textureType = getTextureTypeForTextureViewDimension(viewDimension); + const results = await doTextureCalls( + t, + texture, + viewDescriptor, + textureType, + sampler, + calls, + 'f' + ); + const res = await checkCallResults( + t, + { texels, descriptor, viewDescriptor }, + textureType, + sampler, + calls, + results, + 'f', + texture + ); + t.expectOK(res); + }); diff --git a/src/webgpu/shader/execution/expression/call/builtin/textureSampleCompare.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/textureSampleCompare.spec.ts index eae5098257e6..27e55a8b189b 100644 --- a/src/webgpu/shader/execution/expression/call/builtin/textureSampleCompare.spec.ts +++ b/src/webgpu/shader/execution/expression/call/builtin/textureSampleCompare.spec.ts @@ -1,13 +1,37 @@ export const description = ` Samples a depth texture and compares the sampled depth values against a reference value. + +- TODO: test cube maps with more than 1 mip level. +- TODO: test un-encodable formats. `; import { makeTestGroup } from '../../../../../../common/framework/test_group.js'; -import { GPUTest } from '../../../../../gpu_test.js'; +import { kCompareFunctions } from '../../../../../capability_info.js'; +import { + isDepthTextureFormat, + isEncodableTextureFormat, + kDepthStencilFormats, +} from '../../../../../format_info.js'; -import { generateCoordBoundaries, generateOffsets } from './utils.js'; +import { + checkCallResults, + chooseTextureSize, + createTextureWithRandomDataAndGetTexels, + doTextureCalls, + generateSamplePointsCube, + generateTextureBuiltinInputs2D, + kCubeSamplePointMethods, + kSamplePointMethods, + kShortAddressModes, + kShortAddressModeToAddressMode, + makeRandomDepthComparisonTexelGenerator, + TextureCall, + vec2, + vec3, + WGSLTextureSampleTest, +} from './texture_utils.js'; -export const g = makeTestGroup(GPUTest); +export const g = makeTestGroup(WGSLTextureSampleTest); g.test('2d_coords') .specURL('https://www.w3.org/TR/WGSL/#texturesamplecompare') @@ -18,7 +42,7 @@ fn textureSampleCompare(t: texture_depth_2d, s: sampler_comparison, coords: vec2 Parameters: * t The depth texture to sample. - * s The sampler_comparision type. + * s The sampler_comparison type. * coords The texture coordinates used for sampling. * depth_ref The reference value to compare the sampled depth value against. * offset @@ -29,14 +53,86 @@ Parameters: Values outside of this range will result in a shader-creation error. ` ) - .paramsSubcasesOnly(u => + .params(u => u - .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat']) - .combine('coords', generateCoordBoundaries(2)) - .combine('depth_ref', [-1 /* smaller ref */, 0 /* equal ref */, 1 /* larger ref */] as const) - .combine('offset', generateOffsets(2)) + .combine('format', kDepthStencilFormats) + // filter out stencil only formats + .filter(t => isDepthTextureFormat(t.format)) + // MAINTENANCE_TODO: Remove when support for depth24plus, depth24plus-stencil8, and depth32float-stencil8 is added. + .filter(t => isEncodableTextureFormat(t.format)) + .combine('filt', ['nearest', 'linear'] as const) + .combine('modeU', kShortAddressModes) + .combine('modeV', kShortAddressModes) + .combine('offset', [false, true] as const) + .beginSubcases() + .combine('samplePoints', kSamplePointMethods) + .combine('compare', kCompareFunctions) ) - .unimplemented(); + .fn(async t => { + const { format, samplePoints, modeU, modeV, filt: minFilter, compare, offset } = t.params; + + const size = chooseTextureSize({ minSize: 16, minBlocks: 4, format }); + + const descriptor: GPUTextureDescriptor = { + format, + size, + usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING, + mipLevelCount: 3, + }; + const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor, { + generator: makeRandomDepthComparisonTexelGenerator(descriptor, compare), + }); + const sampler: GPUSamplerDescriptor = { + addressModeU: kShortAddressModeToAddressMode[modeU], + addressModeV: kShortAddressModeToAddressMode[modeV], + compare, + minFilter, + magFilter: minFilter, + mipmapFilter: minFilter, + }; + + const calls: TextureCall[] = generateTextureBuiltinInputs2D(50, { + method: samplePoints, + textureBuiltin: 'textureSampleCompare', + sampler, + descriptor, + derivatives: true, + depthRef: true, + offset, + hashInputs: [format, samplePoints, modeU, modeV, minFilter, offset], + }).map(({ coords, derivativeMult, arrayIndex, depthRef, offset }) => { + return { + builtin: 'textureSampleCompare', + coordType: 'f', + coords, + derivativeMult, + depthRef, + offset, + }; + }); + const textureType = 'texture_depth_2d'; + const viewDescriptor = {}; + const results = await doTextureCalls( + t, + texture, + viewDescriptor, + textureType, + sampler, + calls, + 'f' + ); + const res = await checkCallResults( + t, + { texels, descriptor, viewDescriptor }, + textureType, + sampler, + calls, + results, + 'f', + texture + ); + t.expectOK(res); + }); g.test('3d_coords') .specURL('https://www.w3.org/TR/WGSL/#texturesamplecompare') @@ -46,31 +142,106 @@ fn textureSampleCompare(t: texture_depth_cube, s: sampler_comparison, coords: ve Parameters: * t The depth texture to sample. - * s The sampler_comparision type. + * s The sampler_comparison type. * coords The texture coordinates used for sampling. * depth_ref The reference value to compare the sampled depth value against. ` ) - .paramsSubcasesOnly(u => + .params(u => u - .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat']) - .combine('coords', generateCoordBoundaries(3)) - .combine('depth_ref', [-1 /* smaller ref */, 0 /* equal ref */, 1 /* larger ref */] as const) + .combine('format', kDepthStencilFormats) + // filter out stencil only formats + .filter(t => isDepthTextureFormat(t.format)) + // MAINTENANCE_TODO: Remove when support for depth24plus, depth24plus-stencil8, and depth32float-stencil8 is added. + .filter(t => isEncodableTextureFormat(t.format)) + .combine('filt', ['nearest', 'linear'] as const) + .combine('mode', kShortAddressModes) + .beginSubcases() + .combine('samplePoints', kCubeSamplePointMethods) + .combine('compare', kCompareFunctions) ) - .unimplemented(); + .fn(async t => { + const { format, samplePoints, mode, filt: minFilter, compare } = t.params; + + const viewDimension: GPUTextureViewDimension = 'cube'; + const size = chooseTextureSize({ minSize: 16, minBlocks: 2, format, viewDimension }); + + const descriptor: GPUTextureDescriptor = { + format, + ...(t.isCompatibility && { textureBindingViewDimension: viewDimension }), + size, + usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING, + // MAINTENANCE_TODO: change to 3 once derivatives with cube maps are supported + mipLevelCount: 1, + }; + const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor, { + generator: makeRandomDepthComparisonTexelGenerator(descriptor, compare), + }); + const sampler: GPUSamplerDescriptor = { + addressModeU: kShortAddressModeToAddressMode[mode], + addressModeV: kShortAddressModeToAddressMode[mode], + addressModeW: kShortAddressModeToAddressMode[mode], + compare, + minFilter, + magFilter: minFilter, + mipmapFilter: minFilter, + }; + + const calls: TextureCall[] = generateSamplePointsCube(50, { + method: samplePoints, + sampler, + descriptor, + derivatives: true, + depthRef: true, + textureBuiltin: 'textureSampleCompare', + hashInputs: [format, samplePoints, mode, minFilter, compare], + }).map(({ coords, derivativeMult, depthRef }) => { + return { + builtin: 'textureSampleCompare', + coordType: 'f', + coords, + derivativeMult, + depthRef, + }; + }); + const viewDescriptor = { + dimension: viewDimension, + }; + const textureType = 'texture_depth_cube'; + const results = await doTextureCalls( + t, + texture, + viewDescriptor, + textureType, + sampler, + calls, + 'f' + ); + const res = await checkCallResults( + t, + { texels, descriptor, viewDescriptor }, + textureType, + sampler, + calls, + results, + 'f', + texture + ); + t.expectOK(res); + }); g.test('arrayed_2d_coords') .specURL('https://www.w3.org/TR/WGSL/#texturesamplecompare') .desc( ` -C is i32 or u32 +A is i32 or u32 -fn textureSampleCompare(t: texture_depth_2d_array, s: sampler_comparison, coords: vec2, array_index: C, depth_ref: f32) -> f32 -fn textureSampleCompare(t: texture_depth_2d_array, s: sampler_comparison, coords: vec2, array_index: C, depth_ref: f32, offset: vec2) -> f32 +fn textureSampleCompare(t: texture_depth_2d_array, s: sampler_comparison, coords: vec2, array_index: A, depth_ref: f32) -> f32 +fn textureSampleCompare(t: texture_depth_2d_array, s: sampler_comparison, coords: vec2, array_index: A, depth_ref: f32, offset: vec2) -> f32 Parameters: * t The depth texture to sample. - * s The sampler_comparision type. + * s The sampler_comparison type. * coords The texture coordinates used for sampling. * array_index: The 0-based texture array index to sample. * depth_ref The reference value to compare the sampled depth value against. @@ -82,41 +253,197 @@ Parameters: Values outside of this range will result in a shader-creation error. ` ) - .paramsSubcasesOnly(u => + .params(u => u - .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat']) - .combine('coords', generateCoordBoundaries(2)) - .combine('C', ['i32', 'u32'] as const) - .combine('C_value', [-1, 0, 1, 2, 3, 4] as const) - /* array_index not param'd as out-of-bounds is implementation specific */ - .combine('depth_ref', [-1 /* smaller ref */, 0 /* equal ref */, 1 /* larger ref */] as const) - .combine('offset', generateOffsets(2)) + .combine('format', kDepthStencilFormats) + // filter out stencil only formats + .filter(t => isDepthTextureFormat(t.format)) + // MAINTENANCE_TODO: Remove when support for depth24plus, depth24plus-stencil8, and depth32float-stencil8 is added. + .filter(t => isEncodableTextureFormat(t.format)) + .combine('filt', ['nearest', 'linear'] as const) + .combine('modeU', kShortAddressModes) + .combine('modeV', kShortAddressModes) + .combine('offset', [false, true] as const) + .beginSubcases() + .combine('samplePoints', kSamplePointMethods) + .combine('A', ['i32', 'u32'] as const) + .combine('compare', kCompareFunctions) ) - .unimplemented(); + .beforeAllSubcases(t => { + t.skipIfTextureFormatNotSupported(t.params.format); + }) + .fn(async t => { + const { format, samplePoints, A, modeU, modeV, filt: minFilter, compare, offset } = t.params; + + const viewDimension = '2d-array'; + const size = chooseTextureSize({ minSize: 16, minBlocks: 4, format, viewDimension }); + + const descriptor: GPUTextureDescriptor = { + format, + size, + usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING, + mipLevelCount: 3, + }; + const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor, { + generator: makeRandomDepthComparisonTexelGenerator(descriptor, compare), + }); + const sampler: GPUSamplerDescriptor = { + addressModeU: kShortAddressModeToAddressMode[modeU], + addressModeV: kShortAddressModeToAddressMode[modeV], + compare, + minFilter, + magFilter: minFilter, + mipmapFilter: minFilter, + }; + + const calls: TextureCall[] = generateTextureBuiltinInputs2D(50, { + method: samplePoints, + textureBuiltin: 'textureSampleCompare', + sampler, + descriptor, + derivatives: true, + arrayIndex: { num: texture.depthOrArrayLayers, type: A }, + depthRef: true, + offset, + hashInputs: [format, samplePoints, A, modeU, modeV, minFilter, offset], + }).map(({ coords, derivativeMult, arrayIndex, depthRef, offset }) => { + return { + builtin: 'textureSampleCompare', + coordType: 'f', + coords, + derivativeMult, + arrayIndex, + arrayIndexType: A === 'i32' ? 'i' : 'u', + depthRef, + offset, + }; + }); + const textureType = 'texture_depth_2d_array'; + const viewDescriptor = {}; + const results = await doTextureCalls( + t, + texture, + viewDescriptor, + textureType, + sampler, + calls, + 'f' + ); + const res = await checkCallResults( + t, + { texels, descriptor, viewDescriptor }, + textureType, + sampler, + calls, + results, + 'f', + texture + ); + t.expectOK(res); + }); g.test('arrayed_3d_coords') .specURL('https://www.w3.org/TR/WGSL/#texturesamplecompare') .desc( ` -C is i32 or u32 +A is i32 or u32 -fn textureSampleCompare(t: texture_depth_cube_array, s: sampler_comparison, coords: vec3, array_index: C, depth_ref: f32) -> f32 +fn textureSampleCompare(t: texture_depth_cube_array, s: sampler_comparison, coords: vec3, array_index: A, depth_ref: f32) -> f32 Parameters: * t The depth texture to sample. - * s The sampler_comparision type. + * s The sampler_comparison type. * coords The texture coordinates used for sampling. * array_index: The 0-based texture array index to sample. * depth_ref The reference value to compare the sampled depth value against. ` ) - .paramsSubcasesOnly(u => + .params(u => u - .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat']) - .combine('coords', generateCoordBoundaries(3)) - .combine('C', ['i32', 'u32'] as const) - .combine('C_value', [-1, 0, 1, 2, 3, 4] as const) - /* array_index not param'd as out-of-bounds is implementation specific */ - .combine('depth_ref', [-1 /* smaller ref */, 0 /* equal ref */, 1 /* larger ref */] as const) + .combine('format', kDepthStencilFormats) + // filter out stencil only formats + .filter(t => isDepthTextureFormat(t.format)) + // MAINTENANCE_TODO: Remove when support for depth24plus, depth24plus-stencil8, and depth32float-stencil8 is added. + .filter(t => isEncodableTextureFormat(t.format)) + .combine('filt', ['nearest', 'linear'] as const) + .combine('mode', kShortAddressModes) + .beginSubcases() + .combine('samplePoints', kCubeSamplePointMethods) + .combine('A', ['i32', 'u32'] as const) + .combine('compare', kCompareFunctions) ) - .unimplemented(); + .beforeAllSubcases(t => { + t.skipIfTextureViewDimensionNotSupported('cube-array'); + }) + .fn(async t => { + const { format, A, samplePoints, mode, filt: minFilter, compare } = t.params; + + const viewDimension: GPUTextureViewDimension = 'cube-array'; + const size = chooseTextureSize({ minSize: 8, minBlocks: 2, format, viewDimension }); + + const descriptor: GPUTextureDescriptor = { + format, + ...(t.isCompatibility && { textureBindingViewDimension: viewDimension }), + size, + usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING, + // MAINTENANCE_TODO: change to 3 once derivatives with cube maps are supported + mipLevelCount: 1, + }; + const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor, { + generator: makeRandomDepthComparisonTexelGenerator(descriptor, compare), + }); + const sampler: GPUSamplerDescriptor = { + addressModeU: kShortAddressModeToAddressMode[mode], + addressModeV: kShortAddressModeToAddressMode[mode], + addressModeW: kShortAddressModeToAddressMode[mode], + compare, + minFilter, + magFilter: minFilter, + mipmapFilter: minFilter, + }; + + const calls: TextureCall[] = generateSamplePointsCube(50, { + method: samplePoints, + sampler, + descriptor, + derivatives: true, + textureBuiltin: 'textureSampleCompare', + arrayIndex: { num: texture.depthOrArrayLayers / 6, type: A }, + depthRef: true, + hashInputs: [format, samplePoints, mode, minFilter], + }).map(({ coords, derivativeMult, depthRef, arrayIndex }) => { + return { + builtin: 'textureSampleCompare', + arrayIndex, + arrayIndexType: A === 'i32' ? 'i' : 'u', + coordType: 'f', + coords, + derivativeMult, + depthRef, + }; + }); + const viewDescriptor = { + dimension: viewDimension, + }; + const textureType = 'texture_depth_cube_array'; + const results = await doTextureCalls( + t, + texture, + viewDescriptor, + textureType, + sampler, + calls, + 'f' + ); + const res = await checkCallResults( + t, + { texels, descriptor, viewDescriptor }, + textureType, + sampler, + calls, + results, + 'f', + texture + ); + t.expectOK(res); + }); diff --git a/src/webgpu/shader/execution/expression/call/builtin/textureSampleCompareLevel.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/textureSampleCompareLevel.spec.ts index 500df8a6ecaa..61d093a638cd 100644 --- a/src/webgpu/shader/execution/expression/call/builtin/textureSampleCompareLevel.spec.ts +++ b/src/webgpu/shader/execution/expression/call/builtin/textureSampleCompareLevel.spec.ts @@ -7,34 +7,38 @@ The textureSampleCompareLevel function is the same as textureSampleCompare, exce * The function does not compute derivatives. * There is no requirement for textureSampleCompareLevel to be invoked in uniform control flow. * textureSampleCompareLevel may be invoked in any shader stage. + +- TODO: test un-encodable formats. `; import { makeTestGroup } from '../../../../../../common/framework/test_group.js'; -import { GPUTest } from '../../../../../gpu_test.js'; - -import { generateCoordBoundaries, generateOffsets } from './utils.js'; +import { kCompareFunctions } from '../../../../../capability_info.js'; +import { + isDepthTextureFormat, + isEncodableTextureFormat, + kDepthStencilFormats, +} from '../../../../../format_info.js'; -export const g = makeTestGroup(GPUTest); +import { + checkCallResults, + chooseTextureSize, + createTextureWithRandomDataAndGetTexels, + doTextureCalls, + generateSamplePointsCube, + generateTextureBuiltinInputs2D, + kCubeSamplePointMethods, + kSamplePointMethods, + kShortAddressModes, + kShortAddressModeToAddressMode, + kShortShaderStages, + makeRandomDepthComparisonTexelGenerator, + TextureCall, + vec2, + vec3, + WGSLTextureSampleTest, +} from './texture_utils.js'; -g.test('stage') - .specURL('https://www.w3.org/TR/WGSL/#texturesamplecomparelevel') - .desc( - ` -Tests that 'textureSampleCompareLevel' maybe called in any shader stage. -` - ) - .params(u => u.combine('stage', ['fragment', 'vertex', 'compute'] as const)) - .unimplemented(); - -g.test('control_flow') - .specURL('https://www.w3.org/TR/WGSL/#texturesamplecomparelevel') - .desc( - ` -Tests that 'textureSampleCompareLevel' maybe called in non-uniform control flow. -` - ) - .params(u => u.combine('stage', ['fragment', 'vertex', 'compute'] as const)) - .unimplemented(); +export const g = makeTestGroup(WGSLTextureSampleTest); g.test('2d_coords') .specURL('https://www.w3.org/TR/WGSL/#texturesamplecomparelevel') @@ -45,7 +49,7 @@ fn textureSampleCompareLevel(t: texture_depth_2d, s: sampler_comparison, coords: Parameters: * t The depth texture to sample. - * s The sampler_comparision type. + * s The sampler_comparison type. * coords The texture coordinates used for sampling. * depth_ref The reference value to compare the sampled depth value against. * offset @@ -56,14 +60,96 @@ Parameters: Values outside of this range will result in a shader-creation error. ` ) - .paramsSubcasesOnly(u => + .params(u => u - .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat']) - .combine('coords', generateCoordBoundaries(2)) - .combine('depth_ref', [-1 /* smaller ref */, 0 /* equal ref */, 1 /* larger ref */] as const) - .combine('offset', generateOffsets(2)) + .combine('stage', kShortShaderStages) + .combine('format', kDepthStencilFormats) + // filter out stencil only formats + .filter(t => isDepthTextureFormat(t.format)) + // MAINTENANCE_TODO: Remove when support for depth24plus, depth24plus-stencil8, and depth32float-stencil8 is added. + .filter(t => isEncodableTextureFormat(t.format)) + .combine('filt', ['nearest', 'linear'] as const) + .combine('modeU', kShortAddressModes) + .combine('modeV', kShortAddressModes) + .combine('offset', [false, true] as const) + .beginSubcases() + .combine('samplePoints', kSamplePointMethods) + .combine('compare', kCompareFunctions) ) - .unimplemented(); + .fn(async t => { + const { + format, + stage, + samplePoints, + modeU, + modeV, + filt: minFilter, + compare, + offset, + } = t.params; + + const size = chooseTextureSize({ minSize: 16, minBlocks: 4, format }); + + const descriptor: GPUTextureDescriptor = { + format, + size, + usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING, + mipLevelCount: 3, + }; + const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor, { + generator: makeRandomDepthComparisonTexelGenerator(descriptor, compare), + }); + const sampler: GPUSamplerDescriptor = { + addressModeU: kShortAddressModeToAddressMode[modeU], + addressModeV: kShortAddressModeToAddressMode[modeV], + compare, + minFilter, + magFilter: minFilter, + mipmapFilter: minFilter, + }; + + const calls: TextureCall[] = generateTextureBuiltinInputs2D(50, { + method: samplePoints, + textureBuiltin: 'textureSampleCompareLevel', + sampler, + descriptor, + derivatives: true, + depthRef: true, + offset, + hashInputs: [stage, format, samplePoints, modeU, modeV, minFilter, offset], + }).map(({ coords, derivativeMult, arrayIndex, depthRef, offset }) => { + return { + builtin: 'textureSampleCompareLevel', + coordType: 'f', + coords, + derivativeMult, + depthRef, + offset, + }; + }); + const textureType = 'texture_depth_2d'; + const viewDescriptor = {}; + const results = await doTextureCalls( + t, + texture, + viewDescriptor, + textureType, + sampler, + calls, + stage + ); + const res = await checkCallResults( + t, + { texels, descriptor, viewDescriptor }, + textureType, + sampler, + calls, + results, + stage, + texture + ); + t.expectOK(res); + }); g.test('3d_coords') .specURL('https://www.w3.org/TR/WGSL/#texturesamplecomparelevel') @@ -73,31 +159,107 @@ fn textureSampleCompareLevel(t: texture_depth_cube, s: sampler_comparison, coord Parameters: * t The depth texture to sample. - * s The sampler_comparision type. + * s The sampler_comparison type. * coords The texture coordinates used for sampling. * depth_ref The reference value to compare the sampled depth value against. ` ) - .paramsSubcasesOnly(u => + .params(u => u - .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat']) - .combine('coords', generateCoordBoundaries(3)) - .combine('depth_ref', [-1 /* smaller ref */, 0 /* equal ref */, 1 /* larger ref */] as const) + .combine('stage', kShortShaderStages) + .combine('format', kDepthStencilFormats) + // filter out stencil only formats + .filter(t => isDepthTextureFormat(t.format)) + // MAINTENANCE_TODO: Remove when support for depth24plus, depth24plus-stencil8, and depth32float-stencil8 is added. + .filter(t => isEncodableTextureFormat(t.format)) + .combine('filt', ['nearest', 'linear'] as const) + .combine('mode', kShortAddressModes) + .beginSubcases() + .combine('samplePoints', kCubeSamplePointMethods) + .combine('compare', kCompareFunctions) ) - .unimplemented(); + .fn(async t => { + const { format, stage, samplePoints, mode, filt: minFilter, compare } = t.params; + + const viewDimension: GPUTextureViewDimension = 'cube'; + const size = chooseTextureSize({ minSize: 16, minBlocks: 2, format, viewDimension }); + + const descriptor: GPUTextureDescriptor = { + format, + ...(t.isCompatibility && { textureBindingViewDimension: viewDimension }), + size, + usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING, + // MAINTENANCE_TODO: change to 3 + mipLevelCount: 1, + }; + const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor, { + generator: makeRandomDepthComparisonTexelGenerator(descriptor, compare), + }); + const sampler: GPUSamplerDescriptor = { + addressModeU: kShortAddressModeToAddressMode[mode], + addressModeV: kShortAddressModeToAddressMode[mode], + addressModeW: kShortAddressModeToAddressMode[mode], + compare, + minFilter, + magFilter: minFilter, + mipmapFilter: minFilter, + }; + + const calls: TextureCall[] = generateSamplePointsCube(50, { + method: samplePoints, + sampler, + descriptor, + derivatives: true, + depthRef: true, + textureBuiltin: 'textureSampleCompareLevel', + hashInputs: [stage, format, samplePoints, mode, minFilter, compare], + }).map(({ coords, derivativeMult, depthRef }) => { + return { + builtin: 'textureSampleCompareLevel', + coordType: 'f', + coords, + derivativeMult, + depthRef, + }; + }); + const viewDescriptor = { + dimension: viewDimension, + }; + const textureType = 'texture_depth_cube'; + const results = await doTextureCalls( + t, + texture, + viewDescriptor, + textureType, + sampler, + calls, + stage + ); + const res = await checkCallResults( + t, + { texels, descriptor, viewDescriptor }, + textureType, + sampler, + calls, + results, + stage, + texture + ); + t.expectOK(res); + }); g.test('arrayed_2d_coords') .specURL('https://www.w3.org/TR/WGSL/#texturesamplecomparelevel') .desc( ` -C is i32 or u32 +A is i32 or u32 -fn textureSampleCompareLevel(t: texture_depth_2d_array, s: sampler_comparison, coords: vec2, array_index: C, depth_ref: f32) -> f32 -fn textureSampleCompareLevel(t: texture_depth_2d_array, s: sampler_comparison, coords: vec2, array_index: C, depth_ref: f32, offset: vec2) -> f32 +fn textureSampleCompareLevel(t: texture_depth_2d_array, s: sampler_comparison, coords: vec2, array_index: A, depth_ref: f32) -> f32 +fn textureSampleCompareLevel(t: texture_depth_2d_array, s: sampler_comparison, coords: vec2, array_index: A, depth_ref: f32, offset: vec2) -> f32 Parameters: * t The depth texture to sample. - * s The sampler_comparision type. + * s The sampler_comparison type. * coords The texture coordinates used for sampling. * array_index: The 0-based texture array index to sample. * depth_ref The reference value to compare the sampled depth value against. @@ -109,41 +271,207 @@ Parameters: Values outside of this range will result in a shader-creation error. ` ) - .paramsSubcasesOnly(u => + .params(u => u - .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat']) - .combine('coords', generateCoordBoundaries(2)) - .combine('C', ['i32', 'u32'] as const) - .combine('C_value', [-1, 0, 1, 2, 3, 4] as const) - /* array_index not param'd as out-of-bounds is implementation specific */ - .combine('depth_ref', [-1 /* smaller ref */, 0 /* equal ref */, 1 /* larger ref */] as const) - .combine('offset', generateOffsets(2)) + .combine('stage', kShortShaderStages) + .combine('format', kDepthStencilFormats) + // filter out stencil only formats + .filter(t => isDepthTextureFormat(t.format)) + // MAINTENANCE_TODO: Remove when support for depth24plus, depth24plus-stencil8, and depth32float-stencil8 is added. + .filter(t => isEncodableTextureFormat(t.format)) + .combine('filt', ['nearest', 'linear'] as const) + .combine('modeU', kShortAddressModes) + .combine('modeV', kShortAddressModes) + .combine('offset', [false, true] as const) + .beginSubcases() + .combine('samplePoints', kSamplePointMethods) + .combine('A', ['i32', 'u32'] as const) + .combine('compare', kCompareFunctions) ) - .unimplemented(); + .beforeAllSubcases(t => { + t.skipIfTextureFormatNotSupported(t.params.format); + }) + .fn(async t => { + const { + format, + stage, + samplePoints, + A, + modeU, + modeV, + filt: minFilter, + compare, + offset, + } = t.params; + + const viewDimension = '2d-array'; + const size = chooseTextureSize({ minSize: 16, minBlocks: 4, format, viewDimension }); + + const descriptor: GPUTextureDescriptor = { + format, + size, + usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING, + mipLevelCount: 3, + }; + const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor, { + generator: makeRandomDepthComparisonTexelGenerator(descriptor, compare), + }); + const sampler: GPUSamplerDescriptor = { + addressModeU: kShortAddressModeToAddressMode[modeU], + addressModeV: kShortAddressModeToAddressMode[modeV], + compare, + minFilter, + magFilter: minFilter, + mipmapFilter: minFilter, + }; + + const calls: TextureCall[] = generateTextureBuiltinInputs2D(50, { + method: samplePoints, + textureBuiltin: 'textureSampleCompareLevel', + sampler, + descriptor, + derivatives: true, + arrayIndex: { num: texture.depthOrArrayLayers, type: A }, + depthRef: true, + offset, + hashInputs: [stage, format, samplePoints, A, modeU, modeV, minFilter, offset], + }).map(({ coords, derivativeMult, arrayIndex, depthRef, offset }) => { + return { + builtin: 'textureSampleCompareLevel', + coordType: 'f', + coords, + derivativeMult, + arrayIndex, + arrayIndexType: A === 'i32' ? 'i' : 'u', + depthRef, + offset, + }; + }); + const textureType = 'texture_depth_2d_array'; + const viewDescriptor = {}; + const results = await doTextureCalls( + t, + texture, + viewDescriptor, + textureType, + sampler, + calls, + stage + ); + const res = await checkCallResults( + t, + { texels, descriptor, viewDescriptor }, + textureType, + sampler, + calls, + results, + stage, + texture + ); + t.expectOK(res); + }); g.test('arrayed_3d_coords') .specURL('https://www.w3.org/TR/WGSL/#texturesamplecomparelevel') .desc( ` -C is i32 or u32 +A is i32 or u32 -fn textureSampleCompareLevel(t: texture_depth_cube_array, s: sampler_comparison, coords: vec3, array_index: C, depth_ref: f32) -> f32 +fn textureSampleCompareLevel(t: texture_depth_cube_array, s: sampler_comparison, coords: vec3, array_index: A, depth_ref: f32) -> f32 Parameters: * t The depth texture to sample. - * s The sampler_comparision type. + * s The sampler_comparison type. * coords The texture coordinates used for sampling. * array_index: The 0-based texture array index to sample. * depth_ref The reference value to compare the sampled depth value against. ` ) - .paramsSubcasesOnly(u => + .params(u => u - .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat']) - .combine('coords', generateCoordBoundaries(3)) - .combine('C', ['i32', 'u32'] as const) - .combine('C_value', [-1, 0, 1, 2, 3, 4] as const) - /* array_index not param'd as out-of-bounds is implementation specific */ - .combine('depth_ref', [-1 /* smaller ref */, 0 /* equal ref */, 1 /* larger ref */] as const) + .combine('stage', kShortShaderStages) + .combine('format', kDepthStencilFormats) + // filter out stencil only formats + .filter(t => isDepthTextureFormat(t.format)) + // MAINTENANCE_TODO: Remove when support for depth24plus, depth24plus-stencil8, and depth32float-stencil8 is added. + .filter(t => isEncodableTextureFormat(t.format)) + .combine('filt', ['nearest', 'linear'] as const) + .combine('mode', kShortAddressModes) + .beginSubcases() + .combine('samplePoints', kCubeSamplePointMethods) + .combine('A', ['i32', 'u32'] as const) + .combine('compare', kCompareFunctions) ) - .unimplemented(); + .beforeAllSubcases(t => { + t.skipIfTextureViewDimensionNotSupported('cube-array'); + }) + .fn(async t => { + const { format, A, stage, samplePoints, mode, filt: minFilter, compare } = t.params; + + const viewDimension: GPUTextureViewDimension = 'cube-array'; + const size = chooseTextureSize({ minSize: 8, minBlocks: 2, format, viewDimension }); + + const descriptor: GPUTextureDescriptor = { + format, + ...(t.isCompatibility && { textureBindingViewDimension: viewDimension }), + size, + usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING, + }; + const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor, { + generator: makeRandomDepthComparisonTexelGenerator(descriptor, compare), + }); + const sampler: GPUSamplerDescriptor = { + addressModeU: kShortAddressModeToAddressMode[mode], + addressModeV: kShortAddressModeToAddressMode[mode], + addressModeW: kShortAddressModeToAddressMode[mode], + compare, + minFilter, + magFilter: minFilter, + mipmapFilter: minFilter, + }; + + const calls: TextureCall[] = generateSamplePointsCube(50, { + method: samplePoints, + sampler, + descriptor, + derivatives: true, + textureBuiltin: 'textureSampleCompareLevel', + arrayIndex: { num: texture.depthOrArrayLayers / 6, type: A }, + depthRef: true, + hashInputs: [stage, format, samplePoints, mode, minFilter], + }).map(({ coords, derivativeMult, depthRef, arrayIndex }) => { + return { + builtin: 'textureSampleCompareLevel', + arrayIndex, + arrayIndexType: A === 'i32' ? 'i' : 'u', + coordType: 'f', + coords, + derivativeMult, + depthRef, + }; + }); + const viewDescriptor = { + dimension: viewDimension, + }; + const textureType = 'texture_depth_cube_array'; + const results = await doTextureCalls( + t, + texture, + viewDescriptor, + textureType, + sampler, + calls, + stage + ); + const res = await checkCallResults( + t, + { texels, descriptor, viewDescriptor }, + textureType, + sampler, + calls, + results, + stage, + texture + ); + t.expectOK(res); + }); diff --git a/src/webgpu/shader/execution/expression/call/builtin/textureSampleGrad.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/textureSampleGrad.spec.ts index e0d754ece391..8da6ffdfe9c1 100644 --- a/src/webgpu/shader/execution/expression/call/builtin/textureSampleGrad.spec.ts +++ b/src/webgpu/shader/execution/expression/call/builtin/textureSampleGrad.spec.ts @@ -1,13 +1,42 @@ export const description = ` Samples a texture using explicit gradients. + +- TODO: test cube maps with more than one mip level. +- TODO: Test un-encodable formats. `; import { makeTestGroup } from '../../../../../../common/framework/test_group.js'; -import { GPUTest } from '../../../../../gpu_test.js'; +import { kCompressedTextureFormats, kEncodableTextureFormats } from '../../../../../format_info.js'; + +import { + appendComponentTypeForFormatToTextureType, + checkCallResults, + chooseTextureSize, + createTextureWithRandomDataAndGetTexels, + doTextureCalls, + generateSamplePointsCube, + generateTextureBuiltinInputs2D, + generateTextureBuiltinInputs3D, + getTextureTypeForTextureViewDimension, + isPotentiallyFilterableAndFillable, + isSupportedViewFormatCombo, + kCubeSamplePointMethods, + kSamplePointMethods, + kShortAddressModes, + kShortAddressModeToAddressMode, + kShortShaderStages, + SamplePointMethods, + skipIfNeedsFilteringAndIsUnfilterable, + skipIfTextureFormatNotSupportedNotAvailableOrNotFilterable, + TextureCall, + vec2, + vec3, + WGSLTextureSampleTest, +} from './texture_utils.js'; -import { generateCoordBoundaries, generateOffsets } from './utils.js'; +const kTestableColorFormats = [...kEncodableTextureFormats, ...kCompressedTextureFormats] as const; -export const g = makeTestGroup(GPUTest); +export const g = makeTestGroup(WGSLTextureSampleTest); g.test('sampled_2d_coords') .specURL('https://www.w3.org/TR/WGSL/#texturesamplegrad') @@ -30,13 +59,82 @@ Parameters: Values outside of this range will result in a shader-creation error. ` ) - .paramsSubcasesOnly(u => + .params(u => u - .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat']) - .combine('coords', generateCoordBoundaries(2)) - .combine('offset', generateOffsets(2)) + .combine('stage', kShortShaderStages) + .combine('format', kTestableColorFormats) + .filter(t => isPotentiallyFilterableAndFillable(t.format)) + .combine('filt', ['nearest', 'linear'] as const) + .combine('modeU', kShortAddressModes) + .combine('modeV', kShortAddressModes) + .combine('offset', [false, true] as const) + .beginSubcases() + .combine('samplePoints', kSamplePointMethods) ) - .unimplemented(); + .beforeAllSubcases(t => + skipIfTextureFormatNotSupportedNotAvailableOrNotFilterable(t, t.params.format) + ) + .fn(async t => { + const { format, stage, samplePoints, modeU, modeV, filt: minFilter, offset } = t.params; + skipIfNeedsFilteringAndIsUnfilterable(t, minFilter, format); + + // We want at least 4 blocks or something wide enough for 3 mip levels. + const [width, height] = chooseTextureSize({ minSize: 8, minBlocks: 4, format }); + const descriptor: GPUTextureDescriptor = { + format, + size: { width, height }, + mipLevelCount: 3, + usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING, + }; + const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor); + const sampler: GPUSamplerDescriptor = { + addressModeU: kShortAddressModeToAddressMode[modeU], + addressModeV: kShortAddressModeToAddressMode[modeV], + minFilter, + magFilter: minFilter, + mipmapFilter: minFilter, + }; + + const calls: TextureCall[] = generateTextureBuiltinInputs2D(50, { + method: samplePoints, + sampler, + descriptor, + grad: true, + offset, + hashInputs: [stage, format, samplePoints, modeU, modeV, minFilter, offset], + }).map(({ coords, offset, ddx, ddy }) => { + return { + builtin: 'textureSampleGrad', + coordType: 'f', + coords, + ddx, + ddy, + offset, + }; + }); + const textureType = appendComponentTypeForFormatToTextureType('texture_2d', format); + const viewDescriptor = {}; + const results = await doTextureCalls( + t, + texture, + viewDescriptor, + textureType, + sampler, + calls, + stage + ); + const res = await checkCallResults( + t, + { texels, descriptor, viewDescriptor }, + textureType, + sampler, + calls, + results, + stage, + texture + ); + t.expectOK(res); + }); g.test('sampled_3d_coords') .specURL('https://www.w3.org/TR/WGSL/#texturesamplegrad') @@ -60,13 +158,121 @@ Parameters: Values outside of this range will result in a shader-creation error. ` ) - .paramsSubcasesOnly(u => + .params(u => u - .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat']) - .combine('coords', generateCoordBoundaries(3)) - .combine('offset', generateOffsets(3)) + .combine('stage', kShortShaderStages) + .combine('format', kTestableColorFormats) + .filter(t => isPotentiallyFilterableAndFillable(t.format)) + .combine('dim', ['3d', 'cube'] as const) + .filter(t => isSupportedViewFormatCombo(t.format, t.dim)) + .combine('filt', ['nearest', 'linear'] as const) + .combine('modeU', kShortAddressModes) + .combine('modeV', kShortAddressModes) + .combine('modeW', kShortAddressModes) + .combine('offset', [false, true] as const) + .filter(t => t.dim !== 'cube' || t.offset !== true) + .beginSubcases() + .combine('samplePoints', kCubeSamplePointMethods) + .filter(t => t.samplePoints !== 'cube-edges' || t.dim !== '3d') + ) + .beforeAllSubcases(t => + skipIfTextureFormatNotSupportedNotAvailableOrNotFilterable(t, t.params.format) ) - .unimplemented(); + .fn(async t => { + const { + format, + dim: viewDimension, + stage, + samplePoints, + modeU, + modeV, + modeW, + filt: minFilter, + offset, + } = t.params; + skipIfNeedsFilteringAndIsUnfilterable(t, minFilter, format); + + const size = chooseTextureSize({ minSize: 8, minBlocks: 2, format, viewDimension }); + const descriptor: GPUTextureDescriptor = { + format, + dimension: viewDimension === '3d' ? '3d' : '2d', + ...(t.isCompatibility && { textureBindingViewDimension: viewDimension }), + size, + // MAINTENANCE_TODO: use 3 for cube maps when derivatives are supported for cube maps. + mipLevelCount: viewDimension === '3d' ? 3 : 1, + usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING, + }; + const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor); + const sampler: GPUSamplerDescriptor = { + addressModeU: kShortAddressModeToAddressMode[modeU], + addressModeV: kShortAddressModeToAddressMode[modeV], + addressModeW: kShortAddressModeToAddressMode[modeW], + minFilter, + magFilter: minFilter, + }; + + const hashInputs = [ + format, + viewDimension, + samplePoints, + modeU, + modeV, + modeW, + minFilter, + offset, + ]; + const calls: TextureCall[] = ( + viewDimension === '3d' + ? generateTextureBuiltinInputs3D(50, { + method: samplePoints as SamplePointMethods, + sampler, + descriptor, + grad: true, + offset, + hashInputs, + }) + : generateSamplePointsCube(50, { + method: samplePoints, + sampler, + descriptor, + grad: true, + hashInputs, + }) + ).map(({ coords, offset, ddx, ddy }) => { + return { + builtin: 'textureSampleGrad', + coordType: 'f', + coords, + ddx, + ddy, + offset, + }; + }); + const viewDescriptor = { + dimension: viewDimension, + }; + const textureType = getTextureTypeForTextureViewDimension(viewDimension)!; + const results = await doTextureCalls( + t, + texture, + viewDescriptor, + textureType, + sampler, + calls, + stage + ); + const res = await checkCallResults( + t, + { texels, descriptor, viewDescriptor }, + textureType, + sampler, + calls, + results, + stage, + texture + ); + t.expectOK(res); + }); g.test('sampled_array_2d_coords') .specURL('https://www.w3.org/TR/WGSL/#texturesamplegrad') @@ -92,16 +298,88 @@ Parameters: Values outside of this range will result in a shader-creation error. ` ) - .paramsSubcasesOnly(u => + .params(u => u - .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat']) - .combine('C', ['i32', 'u32'] as const) - .combine('C_value', [-1, 0, 1, 2, 3, 4] as const) - .combine('coords', generateCoordBoundaries(2)) - /* array_index not param'd as out-of-bounds is implementation specific */ - .combine('offset', generateOffsets(2)) + .combine('stage', kShortShaderStages) + .combine('format', kTestableColorFormats) + .filter(t => isPotentiallyFilterableAndFillable(t.format)) + .combine('filt', ['nearest', 'linear'] as const) + .combine('modeU', kShortAddressModes) + .combine('modeV', kShortAddressModes) + .combine('offset', [false, true] as const) + .beginSubcases() + .combine('samplePoints', kSamplePointMethods) + .combine('A', ['i32', 'u32'] as const) + ) + .beforeAllSubcases(t => + skipIfTextureFormatNotSupportedNotAvailableOrNotFilterable(t, t.params.format) ) - .unimplemented(); + .fn(async t => { + const { format, stage, samplePoints, A, modeU, modeV, filt: minFilter, offset } = t.params; + skipIfNeedsFilteringAndIsUnfilterable(t, minFilter, format); + + // We want at least 4 blocks or something wide enough for 3 mip levels. + const [width, height] = chooseTextureSize({ minSize: 8, minBlocks: 4, format }); + const depthOrArrayLayers = 4; + + const descriptor: GPUTextureDescriptor = { + format, + size: { width, height, depthOrArrayLayers }, + usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING, + mipLevelCount: 3, + }; + const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor); + const sampler: GPUSamplerDescriptor = { + addressModeU: kShortAddressModeToAddressMode[modeU], + addressModeV: kShortAddressModeToAddressMode[modeV], + minFilter, + magFilter: minFilter, + mipmapFilter: minFilter, + }; + + const calls: TextureCall[] = generateTextureBuiltinInputs2D(50, { + method: samplePoints, + sampler, + descriptor, + arrayIndex: { num: texture.depthOrArrayLayers, type: A }, + grad: true, + offset, + hashInputs: [stage, format, samplePoints, A, modeU, modeV, minFilter, offset], + }).map(({ coords, ddx, ddy, arrayIndex, offset }) => { + return { + builtin: 'textureSampleGrad', + coordType: 'f', + coords, + ddx, + ddy, + arrayIndex, + arrayIndexType: A === 'i32' ? 'i' : 'u', + offset, + }; + }); + const textureType = 'texture_2d_array'; + const viewDescriptor = {}; + const results = await doTextureCalls( + t, + texture, + viewDescriptor, + textureType, + sampler, + calls, + stage + ); + const res = await checkCallResults( + t, + { texels, descriptor, viewDescriptor }, + textureType, + sampler, + calls, + results, + stage, + texture + ); + t.expectOK(res); + }); g.test('sampled_array_3d_coords') .specURL('https://www.w3.org/TR/WGSL/#texturesamplegrad') @@ -126,11 +404,89 @@ Parameters: Values outside of this range will result in a shader-creation error. ` ) - .paramsSubcasesOnly(u => + .params(u => u - .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat']) - .combine('C', ['i32', 'u32'] as const) - .combine('C_value', [-1, 0, 1, 2, 3, 4] as const) - .combine('coords', generateCoordBoundaries(3)) + .combine('stage', kShortShaderStages) + .combine('format', kTestableColorFormats) + .filter(t => isPotentiallyFilterableAndFillable(t.format)) + .combine('filt', ['nearest', 'linear'] as const) + .combine('mode', kShortAddressModes) + .beginSubcases() + .combine('samplePoints', kCubeSamplePointMethods) + .combine('A', ['i32', 'u32'] as const) ) - .unimplemented(); + .beforeAllSubcases(t => { + skipIfTextureFormatNotSupportedNotAvailableOrNotFilterable(t, t.params.format); + t.skipIfTextureViewDimensionNotSupported('cube-array'); + }) + .fn(async t => { + const { format, stage, samplePoints, A, mode, filt: minFilter } = t.params; + skipIfNeedsFilteringAndIsUnfilterable(t, minFilter, format); + + const viewDimension: GPUTextureViewDimension = 'cube-array'; + const size = chooseTextureSize({ + minSize: 32, + minBlocks: 4, + format, + viewDimension, + }); + const descriptor: GPUTextureDescriptor = { + format, + size, + usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING, + // MAINTENANCE_TODO: use 3 for cube maps when derivatives are supported for cube maps. + mipLevelCount: 1, + }; + const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor); + const sampler: GPUSamplerDescriptor = { + addressModeU: kShortAddressModeToAddressMode[mode], + addressModeV: kShortAddressModeToAddressMode[mode], + addressModeW: kShortAddressModeToAddressMode[mode], + minFilter, + magFilter: minFilter, + mipmapFilter: minFilter, + }; + + const calls: TextureCall[] = generateSamplePointsCube(50, { + method: samplePoints, + sampler, + descriptor, + grad: true, + arrayIndex: { num: texture.depthOrArrayLayers / 6, type: A }, + hashInputs: [stage, format, viewDimension, A, samplePoints, mode, minFilter], + }).map(({ coords, ddx, ddy, arrayIndex }) => { + return { + builtin: 'textureSampleGrad', + coordType: 'f', + coords, + ddx, + ddy, + arrayIndex, + arrayIndexType: A === 'i32' ? 'i' : 'u', + }; + }); + const viewDescriptor = { + dimension: viewDimension, + }; + const textureType = getTextureTypeForTextureViewDimension(viewDimension); + const results = await doTextureCalls( + t, + texture, + viewDescriptor, + textureType, + sampler, + calls, + stage + ); + const res = await checkCallResults( + t, + { texels, descriptor, viewDescriptor }, + textureType, + sampler, + calls, + results, + stage, + texture + ); + t.expectOK(res); + }); diff --git a/src/webgpu/shader/execution/expression/call/builtin/textureSampleLevel.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/textureSampleLevel.spec.ts index 729563553260..840bafcab223 100644 --- a/src/webgpu/shader/execution/expression/call/builtin/textureSampleLevel.spec.ts +++ b/src/webgpu/shader/execution/expression/call/builtin/textureSampleLevel.spec.ts @@ -1,24 +1,11 @@ export const description = ` Samples a texture. -Must only be used in a fragment shader stage. -Must only be invoked in uniform control flow. - - TODO: Test un-encodable formats. -- TODO: set mipLevelCount to 3 for cubemaps. See MAINTENANCE_TODO below - - The issue is sampling a corner of a cubemap is undefined. We try to quantize coordinates - so we never get a corner but when sampling smaller mip levels that's more difficult unless we make the textures - larger. Larger is slower. - - Solution 1: Fix the quantization - Solution 2: special case checking cube corners. Expect some value between the color of the 3 corner texels. - `; import { makeTestGroup } from '../../../../../../common/framework/test_group.js'; import { - isCompressedTextureFormat, isDepthTextureFormat, isEncodableTextureFormat, kCompressedTextureFormats, @@ -38,9 +25,14 @@ import { getDepthOrArrayLayersForViewDimension, getTextureTypeForTextureViewDimension, isPotentiallyFilterableAndFillable, + isSupportedViewFormatCombo, kCubeSamplePointMethods, kSamplePointMethods, + kShortAddressModes, + kShortAddressModeToAddressMode, + kShortShaderStages, SamplePointMethods, + skipIfNeedsFilteringAndIsUnfilterable, skipIfTextureFormatNotSupportedNotAvailableOrNotFilterable, TextureCall, vec2, @@ -78,20 +70,22 @@ Parameters: ) .params(u => u + .combine('stage', kShortShaderStages) .combine('format', kTestableColorFormats) .filter(t => isPotentiallyFilterableAndFillable(t.format)) + .combine('filt', ['nearest', 'linear'] as const) + .combine('modeU', kShortAddressModes) + .combine('modeV', kShortAddressModes) + .combine('offset', [false, true] as const) .beginSubcases() .combine('samplePoints', kSamplePointMethods) - .combine('addressModeU', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const) - .combine('addressModeV', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const) - .combine('minFilter', ['nearest', 'linear'] as const) - .combine('offset', [false, true] as const) ) .beforeAllSubcases(t => skipIfTextureFormatNotSupportedNotAvailableOrNotFilterable(t, t.params.format) ) .fn(async t => { - const { format, samplePoints, addressModeU, addressModeV, minFilter, offset } = t.params; + const { format, stage, samplePoints, modeU, modeV, filt: minFilter, offset } = t.params; + skipIfNeedsFilteringAndIsUnfilterable(t, minFilter, format); // We want at least 4 blocks or something wide enough for 3 mip levels. const [width, height] = chooseTextureSize({ minSize: 8, minBlocks: 4, format }); @@ -103,8 +97,8 @@ Parameters: }; const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor); const sampler: GPUSamplerDescriptor = { - addressModeU, - addressModeV, + addressModeU: kShortAddressModeToAddressMode[modeU], + addressModeV: kShortAddressModeToAddressMode[modeV], minFilter, magFilter: minFilter, mipmapFilter: minFilter, @@ -116,7 +110,7 @@ Parameters: descriptor, mipLevel: { num: texture.mipLevelCount, type: 'f32' }, offset, - hashInputs: [format, samplePoints, addressModeU, addressModeV, minFilter, offset], + hashInputs: [stage, format, samplePoints, modeU, modeV, minFilter, offset], }).map(({ coords, mipLevel, offset }) => { return { builtin: 'textureSampleLevel', @@ -129,14 +123,24 @@ Parameters: }); const textureType = appendComponentTypeForFormatToTextureType('texture_2d', format); const viewDescriptor = {}; - const results = await doTextureCalls(t, texture, viewDescriptor, textureType, sampler, calls); + const results = await doTextureCalls( + t, + texture, + viewDescriptor, + textureType, + sampler, + calls, + stage + ); const res = await checkCallResults( t, { texels, descriptor, viewDescriptor }, textureType, sampler, calls, - results + results, + stage, + texture ); t.expectOK(res); }); @@ -170,21 +174,23 @@ Parameters: ) .params(u => u + .combine('stage', kShortShaderStages) .combine('format', kTestableColorFormats) .filter(t => isPotentiallyFilterableAndFillable(t.format)) + .combine('filt', ['nearest', 'linear'] as const) + .combine('modeU', kShortAddressModes) + .combine('modeV', kShortAddressModes) + .combine('offset', [false, true] as const) .beginSubcases() .combine('samplePoints', kSamplePointMethods) .combine('A', ['i32', 'u32'] as const) - .combine('addressModeU', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const) - .combine('addressModeV', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const) - .combine('minFilter', ['nearest', 'linear'] as const) - .combine('offset', [false, true] as const) ) .beforeAllSubcases(t => skipIfTextureFormatNotSupportedNotAvailableOrNotFilterable(t, t.params.format) ) .fn(async t => { - const { format, samplePoints, A, addressModeU, addressModeV, minFilter, offset } = t.params; + const { format, stage, samplePoints, A, modeU, modeV, filt: minFilter, offset } = t.params; + skipIfNeedsFilteringAndIsUnfilterable(t, minFilter, format); // We want at least 4 blocks or something wide enough for 3 mip levels. const [width, height] = chooseTextureSize({ minSize: 8, minBlocks: 4, format }); @@ -198,8 +204,8 @@ Parameters: }; const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor); const sampler: GPUSamplerDescriptor = { - addressModeU, - addressModeV, + addressModeU: kShortAddressModeToAddressMode[modeU], + addressModeV: kShortAddressModeToAddressMode[modeV], minFilter, magFilter: minFilter, mipmapFilter: minFilter, @@ -212,7 +218,7 @@ Parameters: mipLevel: { num: texture.mipLevelCount, type: 'f32' }, arrayIndex: { num: texture.depthOrArrayLayers, type: A }, offset, - hashInputs: [format, samplePoints, A, addressModeU, addressModeV, minFilter, offset], + hashInputs: [stage, format, samplePoints, A, modeU, modeV, minFilter, offset], }).map(({ coords, mipLevel, arrayIndex, offset }) => { return { builtin: 'textureSampleLevel', @@ -227,14 +233,24 @@ Parameters: }); const textureType = appendComponentTypeForFormatToTextureType('texture_2d_array', format); const viewDescriptor = {}; - const results = await doTextureCalls(t, texture, viewDescriptor, textureType, sampler, calls); + const results = await doTextureCalls( + t, + texture, + viewDescriptor, + textureType, + sampler, + calls, + stage + ); const res = await checkCallResults( t, { texels, descriptor, viewDescriptor }, textureType, sampler, calls, - results + results, + stage, + texture ); t.expectOK(res); }); @@ -266,25 +282,35 @@ Parameters: ) .params(u => u + .combine('stage', kShortShaderStages) .combine('format', kTestableColorFormats) .filter(t => isPotentiallyFilterableAndFillable(t.format)) - .combine('viewDimension', ['3d', 'cube'] as const) - .filter(t => !isCompressedTextureFormat(t.format) || t.viewDimension === 'cube') + .combine('dim', ['3d', 'cube'] as const) + .filter(t => isSupportedViewFormatCombo(t.format, t.dim)) + .combine('filt', ['nearest', 'linear'] as const) + .combine('mode', kShortAddressModes) + .combine('offset', [false, true] as const) + .filter(t => t.dim !== 'cube' || t.offset !== true) .beginSubcases() .combine('samplePoints', kCubeSamplePointMethods) - .filter(t => t.samplePoints !== 'cube-edges' || t.viewDimension !== '3d') - .combine('addressMode', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const) - .combine('minFilter', ['nearest', 'linear'] as const) - .combine('offset', [false, true] as const) - .filter(t => t.viewDimension !== 'cube' || t.offset !== true) + .filter(t => t.samplePoints !== 'cube-edges' || t.dim !== '3d') ) .beforeAllSubcases(t => skipIfTextureFormatNotSupportedNotAvailableOrNotFilterable(t, t.params.format) ) .fn(async t => { - const { format, viewDimension, samplePoints, addressMode, minFilter, offset } = t.params; + const { + format, + dim: viewDimension, + stage, + samplePoints, + mode, + filt: minFilter, + offset, + } = t.params; + skipIfNeedsFilteringAndIsUnfilterable(t, minFilter, format); - const [width, height] = chooseTextureSize({ minSize: 8, minBlocks: 2, format, viewDimension }); + const [width, height] = chooseTextureSize({ minSize: 32, minBlocks: 2, format, viewDimension }); const depthOrArrayLayers = getDepthOrArrayLayersForViewDimension(viewDimension); const descriptor: GPUTextureDescriptor = { @@ -293,19 +319,19 @@ Parameters: ...(t.isCompatibility && { textureBindingViewDimension: viewDimension }), size: { width, height, depthOrArrayLayers }, usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING, - // MAINTENANCE_TODO: make mipLevelCount always 3 - mipLevelCount: viewDimension === 'cube' ? 1 : 3, + mipLevelCount: 3, }; const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor); const sampler: GPUSamplerDescriptor = { - addressModeU: addressMode, - addressModeV: addressMode, - addressModeW: addressMode, + addressModeU: kShortAddressModeToAddressMode[mode], + addressModeV: kShortAddressModeToAddressMode[mode], + addressModeW: kShortAddressModeToAddressMode[mode], minFilter, magFilter: minFilter, mipmapFilter: minFilter, }; + const hashInputs = [stage, format, viewDimension, samplePoints, mode, minFilter, offset]; const calls: TextureCall[] = ( viewDimension === '3d' ? generateTextureBuiltinInputs3D(50, { @@ -314,14 +340,14 @@ Parameters: descriptor, mipLevel: { num: texture.mipLevelCount, type: 'f32' }, offset, - hashInputs: [format, viewDimension, samplePoints, addressMode, minFilter, offset], + hashInputs, }) : generateSamplePointsCube(50, { method: samplePoints, sampler, descriptor, mipLevel: { num: texture.mipLevelCount, type: 'f32' }, - hashInputs: [format, viewDimension, samplePoints, addressMode, minFilter, offset], + hashInputs, }) ).map(({ coords, mipLevel, offset }) => { return { @@ -337,14 +363,24 @@ Parameters: dimension: viewDimension, }; const textureType = getTextureTypeForTextureViewDimension(viewDimension); - const results = await doTextureCalls(t, texture, viewDescriptor, textureType, sampler, calls); + const results = await doTextureCalls( + t, + texture, + viewDescriptor, + textureType, + sampler, + calls, + stage + ); const res = await checkCallResults( t, { texels, descriptor, viewDescriptor }, textureType, sampler, calls, - results + results, + stage, + texture ); t.expectOK(res); }); @@ -379,24 +415,26 @@ Parameters: ) .params(u => u + .combine('stage', kShortShaderStages) .combine('format', kTestableColorFormats) .filter(t => isPotentiallyFilterableAndFillable(t.format)) + .combine('filt', ['nearest', 'linear'] as const) + .combine('mode', kShortAddressModes) .beginSubcases() .combine('samplePoints', kCubeSamplePointMethods) .combine('A', ['i32', 'u32'] as const) - .combine('addressMode', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const) - .combine('minFilter', ['nearest', 'linear'] as const) ) .beforeAllSubcases(t => { skipIfTextureFormatNotSupportedNotAvailableOrNotFilterable(t, t.params.format); t.skipIfTextureViewDimensionNotSupported('cube-array'); }) .fn(async t => { - const { format, samplePoints, A, addressMode, minFilter } = t.params; + const { format, stage, samplePoints, A, mode, filt: minFilter } = t.params; + skipIfNeedsFilteringAndIsUnfilterable(t, minFilter, format); const viewDimension: GPUTextureViewDimension = 'cube-array'; const size = chooseTextureSize({ - minSize: 8, + minSize: 32, minBlocks: 4, format, viewDimension, @@ -405,14 +443,13 @@ Parameters: format, size, usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING, - // MAINTENANCE_TODO: Set this to 3. See above. - mipLevelCount: 1, + mipLevelCount: 3, }; const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor); const sampler: GPUSamplerDescriptor = { - addressModeU: addressMode, - addressModeV: addressMode, - addressModeW: addressMode, + addressModeU: kShortAddressModeToAddressMode[mode], + addressModeV: kShortAddressModeToAddressMode[mode], + addressModeW: kShortAddressModeToAddressMode[mode], minFilter, magFilter: minFilter, mipmapFilter: minFilter, @@ -423,8 +460,8 @@ Parameters: sampler, descriptor, mipLevel: { num: texture.mipLevelCount, type: 'f32' }, - arrayIndex: { num: texture.depthOrArrayLayers, type: A }, - hashInputs: [format, viewDimension, samplePoints, addressMode, minFilter], + arrayIndex: { num: texture.depthOrArrayLayers / 6, type: A }, + hashInputs: [stage, format, viewDimension, A, samplePoints, mode, minFilter], }).map(({ coords, mipLevel, arrayIndex }) => { return { builtin: 'textureSampleLevel', @@ -440,14 +477,24 @@ Parameters: dimension: viewDimension, }; const textureType = getTextureTypeForTextureViewDimension(viewDimension); - const results = await doTextureCalls(t, texture, viewDescriptor, textureType, sampler, calls); + const results = await doTextureCalls( + t, + texture, + viewDescriptor, + textureType, + sampler, + calls, + stage + ); const res = await checkCallResults( t, { texels, descriptor, viewDescriptor }, textureType, sampler, calls, - results + results, + stage, + texture ); t.expectOK(res); }); @@ -456,7 +503,7 @@ g.test('depth_2d_coords') .specURL('https://www.w3.org/TR/WGSL/#texturesamplelevel') .desc( ` -C is i32 or u32 +L is i32 or u32 fn textureSampleLevel(t: texture_depth_2d, s: sampler, coords: vec2, level: L) -> f32 fn textureSampleLevel(t: texture_depth_2d, s: sampler, coords: vec2, level: L, offset: vec2) -> f32 @@ -480,23 +527,24 @@ Parameters: ) .params(u => u + .combine('stage', kShortShaderStages) .combine('format', kDepthStencilFormats) // filter out stencil only formats .filter(t => isDepthTextureFormat(t.format)) // MAINTENANCE_TODO: Remove when support for depth24plus, depth24plus-stencil8, and depth32float-stencil8 is added. .filter(t => isEncodableTextureFormat(t.format)) + .combine('filt', ['nearest', 'linear'] as const) + .combine('mode', kShortAddressModes) + .combine('offset', [false, true] as const) .beginSubcases() .combine('samplePoints', kSamplePointMethods) - .combine('addressMode', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const) - .combine('minFilter', ['nearest', 'linear'] as const) .combine('L', ['i32', 'u32'] as const) - .combine('offset', [false, true] as const) ) .beforeAllSubcases(t => skipIfTextureFormatNotSupportedNotAvailableOrNotFilterable(t, t.params.format) ) .fn(async t => { - const { format, samplePoints, addressMode, minFilter, L, offset } = t.params; + const { format, stage, samplePoints, mode, filt: minFilter, L, offset } = t.params; // We want at least 4 blocks or something wide enough for 3 mip levels. const [width, height] = chooseTextureSize({ minSize: 8, minBlocks: 4, format }); @@ -504,15 +552,12 @@ Parameters: format, size: { width, height }, mipLevelCount: 3, - usage: - GPUTextureUsage.COPY_DST | - GPUTextureUsage.TEXTURE_BINDING | - GPUTextureUsage.RENDER_ATTACHMENT, + usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING, }; const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor); const sampler: GPUSamplerDescriptor = { - addressModeU: addressMode, - addressModeV: addressMode, + addressModeU: kShortAddressModeToAddressMode[mode], + addressModeV: kShortAddressModeToAddressMode[mode], minFilter, magFilter: minFilter, mipmapFilter: minFilter, @@ -524,7 +569,7 @@ Parameters: descriptor, mipLevel: { num: texture.mipLevelCount, type: L }, offset, - hashInputs: [format, samplePoints, addressMode, minFilter, L, offset], + hashInputs: [stage, format, samplePoints, mode, minFilter, L, offset], }).map(({ coords, mipLevel, offset }) => { return { builtin: 'textureSampleLevel', @@ -537,14 +582,24 @@ Parameters: }); const textureType = appendComponentTypeForFormatToTextureType('texture_depth_2d', format); const viewDescriptor = {}; - const results = await doTextureCalls(t, texture, viewDescriptor, textureType, sampler, calls); + const results = await doTextureCalls( + t, + texture, + viewDescriptor, + textureType, + sampler, + calls, + stage + ); const res = await checkCallResults( t, { texels, descriptor, viewDescriptor }, textureType, sampler, calls, - results + results, + stage, + texture ); t.expectOK(res); }); @@ -553,7 +608,8 @@ g.test('depth_array_2d_coords') .specURL('https://www.w3.org/TR/WGSL/#texturesamplelevel') .desc( ` -C is i32 or u32 +A is i32 or u32 +L is i32 or u32 fn textureSampleLevel(t: texture_depth_2d_array, s: sampler, coords: vec2, array_index: A, level: L) -> f32 fn textureSampleLevel(t: texture_depth_2d_array, s: sampler, coords: vec2, array_index: A, level: L, offset: vec2) -> f32 @@ -578,24 +634,25 @@ Parameters: ) .params(u => u + .combine('stage', kShortShaderStages) .combine('format', kDepthStencilFormats) // filter out stencil only formats .filter(t => isDepthTextureFormat(t.format)) // MAINTENANCE_TODO: Remove when support for depth24plus, depth24plus-stencil8, and depth32float-stencil8 is added. .filter(t => isEncodableTextureFormat(t.format)) + .combine('filt', ['nearest', 'linear'] as const) + .combine('mode', kShortAddressModes) + .combine('offset', [false, true] as const) .beginSubcases() .combine('samplePoints', kSamplePointMethods) - .combine('addressMode', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const) - .combine('minFilter', ['nearest', 'linear'] as const) .combine('A', ['i32', 'u32'] as const) .combine('L', ['i32', 'u32'] as const) - .combine('offset', [false, true] as const) ) .beforeAllSubcases(t => skipIfTextureFormatNotSupportedNotAvailableOrNotFilterable(t, t.params.format) ) .fn(async t => { - const { format, samplePoints, addressMode, minFilter, A, L, offset } = t.params; + const { format, stage, samplePoints, mode, filt: minFilter, A, L, offset } = t.params; // We want at least 4 blocks or something wide enough for 3 mip levels. const [width, height] = chooseTextureSize({ minSize: 8, minBlocks: 4, format }); @@ -603,16 +660,13 @@ Parameters: format, size: { width, height }, mipLevelCount: 3, - usage: - GPUTextureUsage.COPY_DST | - GPUTextureUsage.TEXTURE_BINDING | - GPUTextureUsage.RENDER_ATTACHMENT, + usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING, ...(t.isCompatibility && { textureBindingViewDimension: '2d-array' }), }; const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor); const sampler: GPUSamplerDescriptor = { - addressModeU: addressMode, - addressModeV: addressMode, + addressModeU: kShortAddressModeToAddressMode[mode], + addressModeV: kShortAddressModeToAddressMode[mode], minFilter, magFilter: minFilter, mipmapFilter: minFilter, @@ -625,7 +679,7 @@ Parameters: arrayIndex: { num: texture.depthOrArrayLayers, type: A }, mipLevel: { num: texture.mipLevelCount, type: L }, offset, - hashInputs: [format, samplePoints, addressMode, minFilter, L, A, offset], + hashInputs: [stage, format, samplePoints, mode, minFilter, L, A, offset], }).map(({ coords, mipLevel, arrayIndex, offset }) => { return { builtin: 'textureSampleLevel', @@ -640,14 +694,24 @@ Parameters: }); const textureType = appendComponentTypeForFormatToTextureType('texture_depth_2d_array', format); const viewDescriptor: GPUTextureViewDescriptor = { dimension: '2d-array' }; - const results = await doTextureCalls(t, texture, viewDescriptor, textureType, sampler, calls); + const results = await doTextureCalls( + t, + texture, + viewDescriptor, + textureType, + sampler, + calls, + stage + ); const res = await checkCallResults( t, { texels, descriptor, viewDescriptor }, textureType, sampler, calls, - results + results, + stage, + texture ); t.expectOK(res); }); @@ -656,7 +720,8 @@ g.test('depth_3d_coords') .specURL('https://www.w3.org/TR/WGSL/#texturesamplelevel') .desc( ` -C is i32 or u32 +L is i32 or u32 +A is i32 or u32 fn textureSampleLevel(t: texture_depth_cube, s: sampler, coords: vec3, level: L) -> f32 fn textureSampleLevel(t: texture_depth_cube_array, s: sampler, coords: vec3, array_index: A, level: L) -> f32 @@ -680,6 +745,7 @@ Parameters: ) .params(u => u + .combine('stage', kShortShaderStages) .combine('format', kDepthStencilFormats) // filter out stencil only formats .filter(t => isDepthTextureFormat(t.format)) @@ -690,21 +756,21 @@ Parameters: { viewDimension: 'cube-array', A: 'i32' }, { viewDimension: 'cube-array', A: 'u32' }, ] as const) + .combine('filt', ['nearest', 'linear'] as const) + .combine('mode', kShortAddressModes) .beginSubcases() .combine('samplePoints', kCubeSamplePointMethods) .combine('L', ['i32', 'u32'] as const) - .combine('addressMode', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const) - .combine('minFilter', ['nearest', 'linear'] as const) ) .beforeAllSubcases(t => { skipIfTextureFormatNotSupportedNotAvailableOrNotFilterable(t, t.params.format); t.skipIfTextureViewDimensionNotSupported(t.params.viewDimension); }) .fn(async t => { - const { format, viewDimension, samplePoints, A, L, addressMode, minFilter } = t.params; + const { format, stage, viewDimension, samplePoints, A, L, mode, filt: minFilter } = t.params; const size = chooseTextureSize({ - minSize: 8, + minSize: 32, minBlocks: 4, format, viewDimension, @@ -712,18 +778,15 @@ Parameters: const descriptor: GPUTextureDescriptor = { format, size, - usage: - GPUTextureUsage.COPY_DST | - GPUTextureUsage.TEXTURE_BINDING | - GPUTextureUsage.RENDER_ATTACHMENT, + usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING, mipLevelCount: 3, ...(t.isCompatibility && { textureBindingViewDimension: viewDimension }), }; const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor); const sampler: GPUSamplerDescriptor = { - addressModeU: addressMode, - addressModeV: addressMode, - addressModeW: addressMode, + addressModeU: kShortAddressModeToAddressMode[mode], + addressModeV: kShortAddressModeToAddressMode[mode], + addressModeW: kShortAddressModeToAddressMode[mode], minFilter, magFilter: minFilter, mipmapFilter: minFilter, @@ -733,9 +796,9 @@ Parameters: method: samplePoints, sampler, descriptor, - mipLevel: { num: texture.mipLevelCount, type: L }, - arrayIndex: A ? { num: texture.depthOrArrayLayers, type: A } : undefined, - hashInputs: [format, viewDimension, samplePoints, addressMode, minFilter], + mipLevel: { num: texture.mipLevelCount - 1, type: L }, + arrayIndex: A ? { num: texture.depthOrArrayLayers / 6, type: A } : undefined, + hashInputs: [stage, format, viewDimension, samplePoints, mode, minFilter], }).map(({ coords, mipLevel, arrayIndex }) => { return { builtin: 'textureSampleLevel', @@ -752,7 +815,15 @@ Parameters: }; const textureType = viewDimension === 'cube' ? 'texture_depth_cube' : 'texture_depth_cube_array'; - const results = await doTextureCalls(t, texture, viewDescriptor, textureType, sampler, calls); + const results = await doTextureCalls( + t, + texture, + viewDescriptor, + textureType, + sampler, + calls, + stage + ); const res = await checkCallResults( t, @@ -760,7 +831,9 @@ Parameters: textureType, sampler, calls, - results + results, + stage, + texture ); t.expectOK(res); }); diff --git a/src/webgpu/shader/execution/expression/call/builtin/textureStore.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/textureStore.spec.ts index 09b48b13ce63..e955b82ed603 100644 --- a/src/webgpu/shader/execution/expression/call/builtin/textureStore.spec.ts +++ b/src/webgpu/shader/execution/expression/call/builtin/textureStore.spec.ts @@ -743,6 +743,18 @@ g.test('out_of_bounds_array') return true; }) ) + .beforeAllSubcases(t => { + if (t.isCompatibility) { + t.skipIf( + t.params.baseLevel !== 0, + 'view base array layer must equal 0 in compatibility mode' + ); + t.skipIf( + t.params.arrayLevels !== kArrayLevels, + 'view array layers must equal texture array layers in compatibility mode' + ); + } + }) .fn(t => { const dim = '2d'; const view_dim = '2d-array'; diff --git a/src/webgpu/shader/execution/expression/call/builtin/texture_utils.ts b/src/webgpu/shader/execution/expression/call/builtin/texture_utils.ts index e997833a137f..b01f3a5e758f 100644 --- a/src/webgpu/shader/execution/expression/call/builtin/texture_utils.ts +++ b/src/webgpu/shader/execution/expression/call/builtin/texture_utils.ts @@ -1,18 +1,18 @@ import { keysOf } from '../../../../../../common/util/data_tables.js'; import { assert, range, unreachable } from '../../../../../../common/util/util.js'; +import { Float16Array } from '../../../../../../external/petamoriken/float16/float16.js'; import { EncodableTextureFormat, isCompressedFloatTextureFormat, isCompressedTextureFormat, isDepthOrStencilTextureFormat, + isDepthTextureFormat, + isEncodableTextureFormat, + isStencilTextureFormat, kEncodableTextureFormats, kTextureFormatInfo, } from '../../../../../format_info.js'; -import { - GPUTest, - GPUTestSubcaseBatchState, - TextureTestMixinType, -} from '../../../../../gpu_test.js'; +import { GPUTest, GPUTestSubcaseBatchState } from '../../../../../gpu_test.js'; import { align, clamp, @@ -24,6 +24,7 @@ import { } from '../../../../../util/math.js'; import { effectiveViewDimensionForDimension, + physicalMipSize, physicalMipSizeFromTexture, reifyTextureDescriptor, SampleCoord, @@ -37,11 +38,28 @@ import { TexelComponent, TexelRepresentationInfo, } from '../../../../../util/texture/texel_data.js'; -import { TexelView } from '../../../../../util/texture/texel_view.js'; +import { PerPixelAtLevel, TexelView } from '../../../../../util/texture/texel_view.js'; import { createTextureFromTexelViews } from '../../../../../util/texture.js'; import { reifyExtent3D } from '../../../../../util/unions.js'; +import { ShaderStage } from '../../../../validation/decl/util.js'; -export type SampledType = 'f32' | 'i32' | 'u32'; +// These are needed because the list of parameters was too long when converted to a filename. +export const kShortShaderStageToShaderStage = { + c: 'compute' as ShaderStage, + f: 'fragment' as ShaderStage, + v: 'vertex' as ShaderStage, +} as const; +export const kShortShaderStages = keysOf(kShortShaderStageToShaderStage); +export type ShortShaderStage = (typeof kShortShaderStages)[number]; + +// These are needed because the list of parameters was too long when converted to a filename. +export const kShortAddressModeToAddressMode: Record = { + c: 'clamp-to-edge', + r: 'repeat', + m: 'mirror-repeat', +}; + +export const kShortAddressModes = keysOf(kShortAddressModeToAddressMode); export const kSampleTypeInfo = { f32: { @@ -55,6 +73,17 @@ export const kSampleTypeInfo = { }, } as const; +// MAINTENANCE_TODO: Stop excluding sliced compressed 3d formats. +export function isSupportedViewFormatCombo( + format: GPUTextureFormat, + viewDimension: GPUTextureViewDimension +) { + return !( + (isCompressedTextureFormat(format) || isDepthTextureFormat(format)) && + viewDimension === '3d' + ); +} + /** * Return the texture type for a given view dimension */ @@ -77,16 +106,68 @@ export function getTextureTypeForTextureViewDimension(viewDimension: GPUTextureV } } +const is32Float = (format: GPUTextureFormat) => + format === 'r32float' || format === 'rg32float' || format === 'rgba32float'; + /** - * Returns if a texture format can potentially be filtered and can be filled with random data. + * Skips a subcase if the filter === 'linear' and the format is type + * 'unfilterable-float' and we cannot enable filtering. */ -export function isPotentiallyFilterableAndFillable(format: GPUTextureFormat) { - const type = kTextureFormatInfo[format].color?.type; - const canPotentiallyFilter = type === 'float' || type === 'unfilterable-float'; +export function skipIfNeedsFilteringAndIsUnfilterableOrSelectDevice( + t: GPUTestSubcaseBatchState, + filter: GPUFilterMode, + format: GPUTextureFormat +) { + const features = new Set(); + features.add(kTextureFormatInfo[format].feature); + + if (filter === 'linear') { + t.skipIf(isDepthTextureFormat(format), 'depth texture are unfilterable'); + + const type = kTextureFormatInfo[format].color?.type; + if (type === 'unfilterable-float') { + assert(is32Float(format)); + features.add('float32-filterable'); + } + } + + if (features.size > 0) { + t.selectDeviceOrSkipTestCase(Array.from(features)); + } +} + +/** + * Skips a test if filter === 'linear' and the format is not filterable + */ +export function skipIfNeedsFilteringAndIsUnfilterable( + t: GPUTest, + filter: GPUFilterMode, + format: GPUTextureFormat +) { + if (filter === 'linear') { + t.skipIf(isDepthTextureFormat(format), 'depth textures are unfilterable'); + } +} + +/** + * Returns if a texture format can be filled with random data. + */ +export function isFillable(format: GPUTextureFormat) { // We can't easily put random bytes into compressed textures if they are float formats // since we want the range to be +/- 1000 and not +/- infinity or NaN. - const isFillable = !isCompressedTextureFormat(format) || !format.endsWith('float'); - return canPotentiallyFilter && isFillable; + return !isCompressedTextureFormat(format) || !format.endsWith('float'); +} + +/** + * Returns if a texture format can potentially be filtered and can be filled with random data. + */ +export function isPotentiallyFilterableAndFillable(format: GPUTextureFormat) { + const info = kTextureFormatInfo[format]; + const type = info.color?.type ?? info.depth?.type; + const canPotentiallyFilter = + type === 'float' || type === 'unfilterable-float' || type === 'depth'; + const result = canPotentiallyFilter && isFillable(format); + return result; } /** @@ -106,77 +187,380 @@ export function skipIfTextureFormatNotSupportedNotAvailableOrNotFilterable( } /** - * Gets the mip gradient values for the current device. - * The issue is, different GPUs have different ways of mixing between mip levels. - * For most GPUs it's linear but for AMD GPUs on Mac in particular, it's something - * else (which AFAICT is against all the specs). + * Splits in array into multiple arrays where every Nth value goes to a different array + */ +function unzip(array: T[], num: number) { + const arrays: T[][] = range(num, () => []); + array.forEach((v, i) => { + arrays[i % num].push(v); + }); + return arrays; +} + +type MipWeights = { + sampleLevelWeights?: number[]; + softwareMixToGPUMixGradWeights?: number[]; +}; +type MipWeightType = keyof MipWeights; + +function makeGraph(width: number, height: number) { + const data = new Uint8Array(width * height); + + return { + plot(norm: number, x: number, c: number) { + const y = clamp(Math.floor(norm * height), { min: 0, max: height - 1 }); + const offset = (height - y - 1) * width + x; + data[offset] = c; + }, + plotValues(values: Iterable, c: number) { + let i = 0; + for (const v of values) { + this.plot(v, i, c); + ++i; + } + }, + toString(conversion = ['.', 'e', 'A']) { + const lines = []; + for (let y = 0; y < height; ++y) { + const offset = y * width; + lines.push([...data.subarray(offset, offset + width)].map(v => conversion[v]).join('')); + } + return lines.join('\n'); + }, + }; +} + +function* linear0to1OverN(n: number) { + for (let i = 0; i <= n; ++i) { + yield i / n; + } +} + +function graphWeights(height: number, weights: number[]) { + const graph = makeGraph(weights.length, height); + graph.plotValues(linear0to1OverN(weights.length - 1), 1); + graph.plotValues(weights, 2); + return graph.toString(); +} + +/** + * Validates the weights go from 0 to 1 in increasing order. + */ +function validateWeights(stage: string, weights: number[]) { + const showWeights = () => ` +${weights.map((v, i) => `${i.toString().padStart(2)}: ${v}`).join('\n')} + +e = expected +A = actual +${graphWeights(32, weights)} +`; + + // Validate the weights + assert( + weights[0] === 0, + `stage: ${stage}, weight 0 expected 0 but was ${weights[0]}\n${showWeights()}` + ); + assert( + weights[kMipGradientSteps] === 1, + `stage: ${stage}, top weight expected 1 but was ${weights[kMipGradientSteps]}\n${showWeights()}` + ); + + // Note: for 16 steps, these are the AMD weights + // + // standard + // step mipLevel gpu AMD + // ---- -------- -------- ---------- + // 0: 0 0 0 + // 1: 0.0625 0.0625 0 + // 2: 0.125 0.125 0.03125 + // 3: 0.1875 0.1875 0.109375 + // 4: 0.25 0.25 0.1875 + // 5: 0.3125 0.3125 0.265625 + // 6: 0.375 0.375 0.34375 + // 7: 0.4375 0.4375 0.421875 + // 8: 0.5 0.5 0.5 + // 9: 0.5625 0.5625 0.578125 + // 10: 0.625 0.625 0.65625 + // 11: 0.6875 0.6875 0.734375 + // 12: 0.75 0.75 0.8125 + // 13: 0.8125 0.8125 0.890625 + // 14: 0.875 0.875 0.96875 + // 15: 0.9375 0.9375 1 + // 16: 1 1 1 + // + // notice step 1 is 0 and step 15 is 1. + // so we only check the 1 through 14. + // + // Note: these 2 changes are effectively here to catch Intel Mac + // issues and require implementations to work around them. + // + // Ideally the weights should form a straight line + // + // +----------------+ + // | **| + // | ** | + // | ** | + // | ** | + // | ** | + // | ** | + // | ** | + // |** | + // +----------------+ + // + // AMD Mac goes like this: Not great but we allow it + // + // +----------------+ + // | ***| + // | ** | + // | * | + // | ** | + // | ** | + // | * | + // | ** | + // |*** | + // +----------------+ + // + // Intel Mac goes like this: Unacceptable + // + // +----------------+ + // | *******| + // | * | + // | * | + // | * | + // | * | + // | * | + // | * | + // |******* | + // +----------------+ + // + const dx = 1 / kMipGradientSteps; + for (let i = 0; i < kMipGradientSteps; ++i) { + const dy = weights[i + 1] - weights[i]; + // dy / dx because dy might be 0 + const slope = dy / dx; + assert( + slope >= 0, + `stage: ${stage}, weight[${i}] was not <= weight[${i + 1}]\n${showWeights()}` + ); + assert( + slope <= 2, + `stage: ${stage}, slope from weight[${i}] to weight[${i + 1}] is > 2.\n${showWeights()}` + ); + } + + assert( + new Set(weights).size >= ((weights.length * 0.66) | 0), + `stage: ${stage}, expected more unique weights\n${showWeights()}` + ); +} + +/** + * In an attempt to pass on more devices without lowering the tolerances + * so low they are meaningless, we ask the hardware to tell us, for a given + * gradient, level, what mix weights are being used. * - * We seemingly have 3 options: + * This is done by drawing instanced quads and using instance_index to + * write out results into an array. We sample a 2x2 pixel texture with + * 2 mip levels and set the 2nd mip level to white. This means the value + * we get back represents the weight used to mix the 2 mip levels. * - * 1. Increase the tolerances of tests so they pass on AMD. - * 2. Mark AMD as failing - * 3. Try to figure out how the GPU converts mip levels into weights + * Just as a record of some differences across GPUs * - * We're doing 3. + * level weights: mapping from the mip level + * parameter of `textureSampleLevel` to + * the mix weight used by the GPU * - * There's an assumption that the gradient will be the same for all formats - * and usages. + * +--------+--------+--------+--------+ + * | | | intel | amd | + * | | m1 | gen-9 | rna-1 | + * | level | mac | mac | mac | + * +--------+--------+--------+--------+ + * | 0.0000 | 0.0000 | 0.0000 | 0.0000 | + * | 0.0313 | 0.0314 | 0.0313 | 0.0000 | + * | 0.0625 | 0.0625 | 0.0625 | 0.0000 | + * | 0.0938 | 0.0939 | 0.0938 | 0.0000 | + * | 0.1250 | 0.1250 | 0.1250 | 0.0313 | + * | 0.1563 | 0.1564 | 0.1563 | 0.0703 | + * | 0.1875 | 0.1875 | 0.1875 | 0.1094 | + * | 0.2188 | 0.2189 | 0.2188 | 0.1484 | + * | 0.2500 | 0.2500 | 0.2500 | 0.1875 | + * | 0.2813 | 0.2814 | 0.2813 | 0.2266 | + * | 0.3125 | 0.3125 | 0.3125 | 0.2656 | + * | 0.3438 | 0.3439 | 0.3438 | 0.3047 | + * | 0.3750 | 0.3750 | 0.3750 | 0.3438 | + * | 0.4063 | 0.4064 | 0.4063 | 0.3828 | + * | 0.4375 | 0.4375 | 0.4375 | 0.4219 | + * | 0.4688 | 0.4689 | 0.4688 | 0.4609 | + * | 0.5000 | 0.5000 | 0.5000 | 0.5000 | + * | 0.5313 | 0.5314 | 0.5313 | 0.5391 | + * | 0.5625 | 0.5625 | 0.5625 | 0.5781 | + * | 0.5938 | 0.5939 | 0.5938 | 0.6172 | + * | 0.6250 | 0.6250 | 0.6250 | 0.6563 | + * | 0.6563 | 0.6564 | 0.6563 | 0.6953 | + * | 0.6875 | 0.6875 | 0.6875 | 0.7344 | + * | 0.7188 | 0.7189 | 0.7188 | 0.7734 | + * | 0.7500 | 0.7500 | 0.7500 | 0.8125 | + * | 0.7813 | 0.7814 | 0.7813 | 0.8516 | + * | 0.8125 | 0.8125 | 0.8125 | 0.8906 | + * | 0.8438 | 0.8439 | 0.8438 | 0.9297 | + * | 0.8750 | 0.8750 | 0.8750 | 0.9688 | + * | 0.9063 | 0.9064 | 0.9063 | 1.0000 | + * | 0.9375 | 0.9375 | 0.9375 | 1.0000 | + * | 0.9688 | 0.9689 | 0.9688 | 1.0000 | + * | 1.0000 | 1.0000 | 1.0000 | 1.0000 | + * +--------+--------+--------+--------+ + * + * grad weights: mapping from ddx value + * passed into `textureSampleGrad` to + * the mix weight used by the GPU + * + * +--------+--------+--------+--------+ + * | | | intel | amd | + * | | m1 | gen-9 | rna-1 | + * | ddx | mac | mac | mac | + * +--------+--------+--------+--------+ + * | 0.5000 | 0.0000 | 0.0000 | 0.0000 | + * | 0.5109 | 0.0390 | 0.0430 | 0.0000 | + * | 0.5221 | 0.0821 | 0.0859 | 0.0000 | + * | 0.5336 | 0.1211 | 0.1289 | 0.0352 | + * | 0.5453 | 0.1600 | 0.1719 | 0.0898 | + * | 0.5572 | 0.2032 | 0.2109 | 0.1328 | + * | 0.5694 | 0.2422 | 0.2461 | 0.1797 | + * | 0.5819 | 0.2814 | 0.2852 | 0.2305 | + * | 0.5946 | 0.3203 | 0.3203 | 0.2773 | + * | 0.6076 | 0.3554 | 0.3594 | 0.3164 | + * | 0.6209 | 0.3868 | 0.3906 | 0.3633 | + * | 0.6345 | 0.4218 | 0.4258 | 0.4063 | + * | 0.6484 | 0.4532 | 0.4609 | 0.4492 | + * | 0.6626 | 0.4882 | 0.4922 | 0.4883 | + * | 0.6771 | 0.5196 | 0.5234 | 0.5273 | + * | 0.6920 | 0.5507 | 0.5547 | 0.5664 | + * | 0.7071 | 0.5860 | 0.5859 | 0.6055 | + * | 0.7226 | 0.6132 | 0.6133 | 0.6406 | + * | 0.7384 | 0.6407 | 0.6445 | 0.6797 | + * | 0.7546 | 0.6679 | 0.6719 | 0.7148 | + * | 0.7711 | 0.6953 | 0.6992 | 0.7461 | + * | 0.7880 | 0.7225 | 0.7266 | 0.7813 | + * | 0.8052 | 0.7500 | 0.7539 | 0.8164 | + * | 0.8229 | 0.7814 | 0.7813 | 0.8516 | + * | 0.8409 | 0.8086 | 0.8086 | 0.8828 | + * | 0.8593 | 0.8321 | 0.8320 | 0.9141 | + * | 0.8781 | 0.8554 | 0.8594 | 0.9492 | + * | 0.8974 | 0.8789 | 0.8828 | 0.9766 | + * | 0.9170 | 0.9025 | 0.9063 | 1.0000 | + * | 0.9371 | 0.9297 | 0.9297 | 1.0000 | + * | 0.9576 | 0.9532 | 0.9531 | 1.0000 | + * | 0.9786 | 0.9765 | 0.9766 | 1.0000 | + * | 1.0000 | 1.0000 | 1.0000 | 1.0000 | + * +--------+--------+--------+--------+ */ -const kMipGradientSteps = 16; -const s_deviceToMipGradientValues = new WeakMap(); -async function initMipGradientValuesForDevice(t: GPUTest) { + +async function queryMipGradientValuesForDevice(t: GPUTest, stage: ShaderStage) { const { device } = t; - const weights = s_deviceToMipGradientValues.get(device); - if (!weights) { - const module = device.createShaderModule({ - code: ` - @group(0) @binding(0) var tex: texture_2d; - @group(0) @binding(1) var smp: sampler; - @group(0) @binding(2) var result: array; + const kNumWeightTypes = 2; + const module = device.createShaderModule({ + code: ` + @group(0) @binding(0) var tex: texture_2d; + @group(0) @binding(1) var smp: sampler; + @group(0) @binding(2) var result: array; + + struct VSOutput { + @builtin(position) pos: vec4f, + @location(0) @interpolate(flat, either) ndx: u32, + @location(1) @interpolate(flat, either) result: vec4f, + }; - @compute @workgroup_size(1) fn cs(@builtin(global_invocation_id) id: vec3u) { - let mipLevel = f32(id.x) / ${kMipGradientSteps}; - result[id.x] = textureSampleLevel(tex, smp, vec2f(0.5), mipLevel).r; + fn getMixLevels(wNdx: u32) -> vec4f { + let mipLevel = f32(wNdx) / ${kMipGradientSteps}; + let size = textureDimensions(tex); + let g = mix(1.0, 2.0, mipLevel) / f32(size.x); + let ddx = vec2f(g, 0); + return vec4f( + textureSampleLevel(tex, smp, vec2f(0.5), mipLevel).r, + textureSampleGrad(tex, smp, vec2f(0.5), ddx, vec2f(0)).r, + 0, + 0); + } + + fn recordMixLevels(wNdx: u32, r: vec4f) { + let ndx = wNdx * ${kNumWeightTypes}; + for (var i: u32 = 0; i < ${kNumWeightTypes}; i++) { + result[ndx + i] = r[i]; } - `, - }); + } - const pipeline = device.createComputePipeline({ - layout: 'auto', - compute: { module }, - }); + fn getPosition(vNdx: u32) -> vec4f { + let pos = array( + vec2f(-1, 3), + vec2f( 3, -1), + vec2f(-1, -1), + ); + let p = pos[vNdx]; + return vec4f(p, 0, 1); + } - const texture = t.createTextureTracked({ - size: [2, 2, 1], - format: 'r8unorm', - usage: GPUTextureUsage.TEXTURE_BINDING | GPUTextureUsage.COPY_DST, - mipLevelCount: 2, - }); + @vertex fn vs(@builtin(vertex_index) vNdx: u32, @builtin(instance_index) iNdx: u32) -> VSOutput { + return VSOutput(getPosition(vNdx), iNdx, vec4f(0)); + } - device.queue.writeTexture( - { texture, mipLevel: 1 }, - new Uint8Array([255]), - { bytesPerRow: 1 }, - [1, 1] - ); + @fragment fn fsRecord(v: VSOutput) -> @location(0) vec4f { + recordMixLevels(v.ndx, getMixLevels(v.ndx)); + return vec4f(0); + } - const sampler = device.createSampler({ - minFilter: 'linear', - magFilter: 'linear', - mipmapFilter: 'linear', - }); + @compute @workgroup_size(1) fn csRecord(@builtin(global_invocation_id) id: vec3u) { + recordMixLevels(id.x, getMixLevels(id.x)); + } - const storageBuffer = t.createBufferTracked({ - size: 4 * (kMipGradientSteps + 1), - usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC, - }); + @vertex fn vsRecord(@builtin(vertex_index) vNdx: u32, @builtin(instance_index) iNdx: u32) -> VSOutput { + return VSOutput(getPosition(vNdx), iNdx, getMixLevels(iNdx)); + } - const resultBuffer = t.createBufferTracked({ - size: storageBuffer.size, - usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ, - }); + @fragment fn fsSaveVs(v: VSOutput) -> @location(0) vec4f { + recordMixLevels(v.ndx, v.result); + return vec4f(0); + } + `, + }); - const bindGroup = device.createBindGroup({ + const texture = t.createTextureTracked({ + size: [2, 2, 1], + format: 'r8unorm', + usage: GPUTextureUsage.TEXTURE_BINDING | GPUTextureUsage.COPY_DST, + mipLevelCount: 2, + }); + + device.queue.writeTexture( + { texture, mipLevel: 1 }, + new Uint8Array([255]), + { bytesPerRow: 1 }, + [1, 1] + ); + + const sampler = device.createSampler({ + minFilter: 'linear', + magFilter: 'linear', + mipmapFilter: 'linear', + }); + + const target = t.createTextureTracked({ + size: [1, 1], + format: 'rgba8unorm', + usage: GPUTextureUsage.RENDER_ATTACHMENT, + }); + + const storageBuffer = t.createBufferTracked({ + size: 4 * (kMipGradientSteps + 1) * kNumWeightTypes, + usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC, + }); + + const resultBuffer = t.createBufferTracked({ + size: storageBuffer.size, + usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ, + }); + + const createBindGroup = (pipeline: GPUComputePipeline | GPURenderPipeline) => + device.createBindGroup({ layout: pipeline.getBindGroupLayout(0), entries: [ { binding: 0, resource: texture.createView() }, @@ -185,79 +569,268 @@ async function initMipGradientValuesForDevice(t: GPUTest) { ], }); - const encoder = device.createCommandEncoder(); - const pass = encoder.beginComputePass(); - pass.setPipeline(pipeline); - pass.setBindGroup(0, bindGroup); - pass.dispatchWorkgroups(kMipGradientSteps + 1); - pass.end(); - encoder.copyBufferToBuffer(storageBuffer, 0, resultBuffer, 0, resultBuffer.size); - device.queue.submit([encoder.finish()]); + const encoder = device.createCommandEncoder(); + switch (stage) { + case 'compute': { + const pipeline = device.createComputePipeline({ + layout: 'auto', + compute: { module }, + }); + const pass = encoder.beginComputePass(); + pass.setPipeline(pipeline); + pass.setBindGroup(0, createBindGroup(pipeline)); + pass.dispatchWorkgroups(kMipGradientSteps + 1); + pass.end(); + break; + } + case 'fragment': { + const pipeline = device.createRenderPipeline({ + layout: 'auto', + vertex: { module, entryPoint: 'vs' }, + fragment: { module, entryPoint: 'fsRecord', targets: [{ format: 'rgba8unorm' }] }, + }); + const pass = encoder.beginRenderPass({ + colorAttachments: [ + { + view: target.createView(), + loadOp: 'clear', + storeOp: 'store', + }, + ], + }); + pass.setPipeline(pipeline); + pass.setBindGroup(0, createBindGroup(pipeline)); + pass.draw(3, kMipGradientSteps + 1); + pass.end(); + break; + } + case 'vertex': { + const pipeline = device.createRenderPipeline({ + layout: 'auto', + vertex: { module, entryPoint: 'vsRecord' }, + fragment: { module, entryPoint: 'fsSaveVs', targets: [{ format: 'rgba8unorm' }] }, + }); + const pass = encoder.beginRenderPass({ + colorAttachments: [ + { + view: target.createView(), + loadOp: 'clear', + storeOp: 'store', + }, + ], + }); + pass.setPipeline(pipeline); + pass.setBindGroup(0, createBindGroup(pipeline)); + pass.draw(3, kMipGradientSteps + 1); + pass.end(); + break; + } + } + encoder.copyBufferToBuffer(storageBuffer, 0, resultBuffer, 0, resultBuffer.size); + device.queue.submit([encoder.finish()]); - await resultBuffer.mapAsync(GPUMapMode.READ); - const weights = Array.from(new Float32Array(resultBuffer.getMappedRange())); - resultBuffer.unmap(); + await resultBuffer.mapAsync(GPUMapMode.READ); + const result = Array.from(new Float32Array(resultBuffer.getMappedRange())); + resultBuffer.unmap(); + resultBuffer.destroy(); - texture.destroy(); - storageBuffer.destroy(); - resultBuffer.destroy(); + const [sampleLevelWeights, gradWeights] = unzip(result, kNumWeightTypes); + + validateWeights(stage, sampleLevelWeights); + validateWeights(stage, gradWeights); + + texture.destroy(); + storageBuffer.destroy(); + + return { + sampleLevelWeights, + softwareMixToGPUMixGradWeights: generateSoftwareMixToGPUMixGradWeights(gradWeights, texture), + }; +} - // Validate the weights - assert(weights[0] === 0); - assert(weights[kMipGradientSteps] === 1); - assert(weights[kMipGradientSteps / 2] === 0.5); - - // Note: for 16 steps, these are the AMD weights - // - // standard - // step mipLevel gpu AMD - // ---- -------- -------- ---------- - // 0: 0 0 1 - // 1: 0.0625 0.0625 0 - // 2: 0.125 0.125 0.03125 - // 3: 0.1875 0.1875 0.109375 - // 4: 0.25 0.25 0.1875 - // 5: 0.3125 0.3125 0.265625 - // 6: 0.375 0.375 0.34375 - // 7: 0.4375 0.4375 0.421875 - // 8: 0.5 0.5 0.5 - // 9: 0.5625 0.5625 0.578125 - // 10: 0.625 0.625 0.65625 - // 11: 0.6875 0.6875 0.734375 - // 12: 0.75 0.75 0.8125 - // 13: 0.8125 0.8125 0.890625 - // 14: 0.875 0.875 0.96875 - // 15: 0.9375 0.9375 1 - // 16: 1 1 1 - // - // notice step 1 is 0 and step 15 is 1. - // so we only check the 1 through 14. - for (let i = 1; i < kMipGradientSteps - 1; ++i) { - assert(weights[i] < weights[i + 1]); +// Given an array of ascending values and a value v, finds +// which indices in the array v is between. Returns the lower +// index and the mix weight between the 2 indices for v. +// +// In other words, if values = [10, 20, 30, 40, 50] +// +// getIndexAndWeight(values, 38) -> [2, 0.8] +// +// Example: +// +// values = [10, 20, 30, 40, 50] +// v = 38 +// [ndx, weight] = getIndexAndWeight(values, v); +// v2 = lerp(values[ndx], values[ndx + 1], weight); +// assert(v === v2) +function getIndexAndWeight(values: readonly number[], v: number) { + assert(v >= values[0] && v <= values[values.length - 1]); + let lo = 0; + let hi = values.length - 1; + for (;;) { + const i = (lo + (hi - lo) / 2) | 0; + const w0 = values[i]; + const w1 = values[i + 1]; + if (lo === hi || (v >= w0 && v <= w1)) { + const weight = (v - w0) / (w1 - w0); + return [i, weight]; } + if (v < w0) { + hi = i; + } else { + lo = i + 1; + } + } +} + +/** + * Given a fractional number between 0 and values.length returns the value between + * 2 values. Effectively lerp(values[ndx], values[ndx + 1], weight) + */ +function bilinearFilter(values: readonly number[], ndx: number, weight: number) { + const v0 = values[ndx]; + const v1 = values[ndx + 1] ?? 0; + assert(ndx < values.length - 1 || (ndx === values.length - 1 && weight === 0)); + return lerp(v0, v1, weight); +} + +/** + * Generates an array of values that maps between the software renderer's gradient computed + * mip level and the GPUs gradient computed mip level for mip level 0 to 1. + */ +function generateSoftwareMixToGPUMixGradWeights(gpuWeights: number[], texture: GPUTexture) { + const numSteps = gpuWeights.length - 1; + const size = [texture.width, texture.height, texture.depthOrArrayLayers]; + const softwareWeights = range(numSteps + 1, i => { + // u goes from 0 to 1 + const u = i / numSteps; + const g = lerp(1, 2, u) / texture.width; + const mipLevel = computeMipLevelFromGradients([g], [0], size); + assert(mipLevel >= 0 && mipLevel <= 1); + return mipLevel; + }); + const softwareMixToGPUMixMap = range(numSteps + 1, i => { + const mix = i / numSteps; + const [ndx, weight] = getIndexAndWeight(softwareWeights, mix); + return bilinearFilter(gpuWeights, ndx, weight); + }); + return softwareMixToGPUMixMap; +} + +function mapSoftwareMipLevelToGPUMipLevel(t: GPUTest, stage: ShaderStage, mipLevel: number) { + const baseLevel = Math.floor(mipLevel); + const softwareMix = mipLevel - baseLevel; + const gpuMix = getMixWeightByTypeForMipLevel( + t, + stage, + 'softwareMixToGPUMixGradWeights', + softwareMix + ); + return baseLevel + gpuMix; +} + +const euclideanModulo = (n: number, m: number) => ((n % m) + m) % m; - s_deviceToMipGradientValues.set(device, weights); +/** + * Gets the mip gradient values for the current device. + * The issue is, different GPUs have different ways of mixing between mip levels. + * For most GPUs it's linear but for AMD GPUs on Mac in particular, it's something + * else (which AFAICT is against all the specs). + * + * We seemingly have 3 options: + * + * 1. Increase the tolerances of tests so they pass on AMD. + * 2. Mark AMD as failing + * 3. Try to figure out how the GPU converts mip levels into weights + * + * We're doing 3. + * + * There's an assumption that the gradient will be the same for all formats + * and usages. + * + * Note: The code below has 2 maps. One device->Promise, the other device->weights + * device->weights is meant to be used synchronously by other code so we don't + * want to leave initMipGradientValuesForDevice until the weights have been read. + * But, multiple subcases will run because this function is async. So, subcase 1 + * runs, hits this init code, this code waits for the weights. Then, subcase 2 + * runs and hits this init code. The weights will not be in the device->weights map + * yet which is why we have the device->Promise map. This is so subcase 2 waits + * for subcase 1's "query the weights" step. Otherwise, all subcases would do the + * "get the weights" step separately. + */ +const kMipGradientSteps = 64; +const s_deviceToMipGradientValuesPromise = new WeakMap< + GPUDevice, + Record> +>(); +const s_deviceToMipGradientValues = new WeakMap>(); + +async function initMipGradientValuesForDevice(t: GPUTest, stage: ShaderStage) { + const { device } = t; + // Get the per stage promises (or make them) + const stageWeightsP = + s_deviceToMipGradientValuesPromise.get(device) ?? + ({} as Record>); + s_deviceToMipGradientValuesPromise.set(device, stageWeightsP); + + let weightsP = stageWeightsP[stage]; + if (!weightsP) { + // There was no promise for this weight so request it + // and add a then clause so the first thing that will happen + // when the promise resolves is that we'll record the weights for + // that stage. + weightsP = queryMipGradientValuesForDevice(t, stage); + weightsP + .then(weights => { + const stageWeights = + s_deviceToMipGradientValues.get(device) ?? ({} as Record); + s_deviceToMipGradientValues.set(device, stageWeights); + stageWeights[stage] = weights; + }) + .catch(e => { + throw e; + }); + stageWeightsP[stage] = weightsP; } + return await weightsP; } -function getWeightForMipLevel(t: GPUTest, mipLevelCount: number, mipLevel: number) { - if (mipLevel < 0 || mipLevel >= mipLevelCount) { - return 1; +function getMixWeightByTypeForMipLevel( + t: GPUTest, + stage: ShaderStage, + weightType: MipWeightType | 'identity', + mipLevel: number +) { + if (weightType === 'identity') { + return euclideanModulo(mipLevel, 1); } // linear interpolate between weights - const weights = s_deviceToMipGradientValues.get(t.device); + const weights = s_deviceToMipGradientValues.get(t.device)![stage][weightType]; assert( !!weights, 'you must use WGSLTextureSampleTest or call initializeDeviceMipWeights before calling this function' ); const steps = weights.length - 1; - const w = (mipLevel % 1) * steps; + const w = euclideanModulo(mipLevel, 1) * steps; const lowerNdx = Math.floor(w); const upperNdx = Math.ceil(w); const mix = w % 1; return lerp(weights[lowerNdx], weights[upperNdx], mix); } +function getWeightForMipLevel( + t: GPUTest, + stage: ShaderStage, + weightType: MipWeightType | 'identity', + mipLevelCount: number, + mipLevel: number +) { + if (mipLevel < 0 || mipLevel >= mipLevelCount) { + return 1; + } + return getMixWeightByTypeForMipLevel(t, stage, weightType, mipLevel); +} + /** * Used for textureDimension, textureNumLevels, textureNumLayers */ @@ -305,7 +878,6 @@ export class WGSLTextureQueryTest extends GPUTest { export class WGSLTextureSampleTest extends GPUTest { override async init(): Promise { await super.init(); - await initMipGradientValuesForDevice(this); } } @@ -329,17 +901,16 @@ function getLimitValue(v: number) { } } -function getValueBetweenMinAndMaxTexelValueInclusive( +function getMinAndMaxTexelValueForComponent( rep: TexelRepresentationInfo, - component: TexelComponent, - normalized: number + component: TexelComponent ) { assert(!!rep.numericRange); const perComponentRanges = rep.numericRange as PerComponentNumericRange; const perComponentRange = perComponentRanges[component]; const range = rep.numericRange as NumericRange; const { min, max } = perComponentRange ? perComponentRange : range; - return lerp(getLimitValue(min), getLimitValue(max), normalized); + return { min: getLimitValue(min), max: getLimitValue(max) }; } /** @@ -397,16 +968,72 @@ export function appendComponentTypeForFormatToTextureType(base: string, format: : `${base}<${getTextureFormatTypeInfo(format).componentType}>`; } +type RandomTextureOptions = { + generator: PerPixelAtLevel>; +}; + /** - * Creates a TexelView filled with random values. + * Make a generator for texels for depth comparison tests. */ -export function createRandomTexelView(info: { - format: GPUTextureFormat; - size: GPUExtent3D; - mipLevel: number; -}): TexelView { +export function makeRandomDepthComparisonTexelGenerator( + info: { + format: GPUTextureFormat; + size: GPUExtent3D; + }, + comparison: GPUCompareFunction +) { const rep = kTexelRepresentationInfo[info.format as EncodableTextureFormat]; const size = reifyExtent3D(info.size); + + const comparisonIsEqualOrNotEqual = comparison === 'equal' || comparison === 'not-equal'; + + // for equal and not-equal we just want to test 0, 0.6, and 1 + // for everything else we want 0 to 1 + // Note: 0.6 is chosen because we'll never choose 0.6 as our depth reference + // value. (see generateTextureBuiltinInputsImpl and generateSamplePointsCube) + // The problem with comparing equal is other than 0.0 and 1.0, no other + // values are guaranteed to be equal. + const fixedValues = [0, 0.6, 1, 1]; + const format = comparisonIsEqualOrNotEqual + ? (norm: number) => fixedValues[(norm * (fixedValues.length - 1)) | 0] + : (norm: number) => norm; + + return (coords: SampleCoord): Readonly> => { + const texel: PerTexelComponent = {}; + for (const component of rep.componentOrder) { + const rnd = hashU32( + coords.x, + coords.y, + coords.z, + coords.sampleIndex ?? 0, + component.charCodeAt(0), + size.width, + size.height, + size.depthOrArrayLayers + ); + const normalized = clamp(rnd / 0xffffffff, { min: 0, max: 1 }); + texel[component] = format(normalized); + } + return quantize(texel, rep); + }; +} + +function createRandomTexelViewViaColors( + info: { + format: GPUTextureFormat; + size: GPUExtent3D; + mipLevel: number; + }, + options?: RandomTextureOptions | undefined +): TexelView { + const rep = kTexelRepresentationInfo[info.format as EncodableTextureFormat]; + const size = reifyExtent3D(info.size); + const minMax = Object.fromEntries( + rep.componentOrder.map(component => [ + component, + getMinAndMaxTexelValueForComponent(rep, component), + ]) + ); const generator = (coords: SampleCoord): Readonly> => { const texel: PerTexelComponent = {}; for (const component of rep.componentOrder) { @@ -422,30 +1049,125 @@ export function createRandomTexelView(info: { size.depthOrArrayLayers ); const normalized = clamp(rnd / 0xffffffff, { min: 0, max: 1 }); - texel[component] = getValueBetweenMinAndMaxTexelValueInclusive(rep, component, normalized); + const { min, max } = minMax[component]; + texel[component] = lerp(min, max, normalized); } return quantize(texel, rep); }; - return TexelView.fromTexelsAsColors(info.format as EncodableTextureFormat, generator); + return TexelView.fromTexelsAsColors( + info.format as EncodableTextureFormat, + options?.generator ?? generator + ); +} + +function createRandomTexelViewViaBytes(info: { + format: GPUTextureFormat; + size: GPUExtent3D; + mipLevel: number; + sampleCount: number; +}): TexelView { + const { format } = info; + const formatInfo = kTextureFormatInfo[format]; + const rep = kTexelRepresentationInfo[info.format as EncodableTextureFormat]; + assert(!!rep); + const bytesPerBlock = (formatInfo.color?.bytes ?? formatInfo.stencil?.bytes)!; + assert(bytesPerBlock > 0); + const size = physicalMipSize(reifyExtent3D(info.size), info.format, '2d', 0); + const blocksAcross = Math.ceil(size.width / formatInfo.blockWidth); + const blocksDown = Math.ceil(size.height / formatInfo.blockHeight); + const bytesPerRow = blocksAcross * bytesPerBlock * info.sampleCount; + const bytesNeeded = bytesPerRow * blocksDown * size.depthOrArrayLayers; + const data = new Uint8Array(bytesNeeded); + + const hashBase = + sumOfCharCodesOfString(info.format) + + size.width + + size.height + + size.depthOrArrayLayers + + info.mipLevel + + info.sampleCount; + + if (info.format.includes('32float') || info.format.includes('16float')) { + const { min, max } = getMinAndMaxTexelValueForComponent(rep, TexelComponent.R); + const asFloat = info.format.includes('32float') + ? new Float32Array(data.buffer) + : new Float16Array(data.buffer); + for (let i = 0; i < asFloat.length; ++i) { + asFloat[i] = lerp(min, max, hashU32(hashBase + i) / 0xffff_ffff); + } + } else if (bytesNeeded % 4 === 0) { + const asU32 = new Uint32Array(data.buffer); + for (let i = 0; i < asU32.length; ++i) { + asU32[i] = hashU32(hashBase + i); + } + } else { + for (let i = 0; i < bytesNeeded; ++i) { + data[i] = hashU32(hashBase + i); + } + } + + return TexelView.fromTextureDataByReference(info.format as EncodableTextureFormat, data, { + bytesPerRow, + rowsPerImage: size.height, + subrectOrigin: [0, 0, 0], + subrectSize: size, + }); +} + +/** + * Creates a TexelView filled with random values. + */ +function createRandomTexelView( + info: { + format: GPUTextureFormat; + size: GPUExtent3D; + mipLevel: number; + sampleCount: number; + }, + options?: RandomTextureOptions | undefined +): TexelView { + assert(!isCompressedTextureFormat(info.format)); + const formatInfo = kTextureFormatInfo[info.format]; + const type = formatInfo.color?.type ?? formatInfo.depth?.type ?? formatInfo.stencil?.type; + const canFillWithRandomTypedData = + !options && + isEncodableTextureFormat(info.format) && + ((info.format.includes('norm') && type !== 'depth') || + info.format.includes('16float') || + (info.format.includes('32float') && type !== 'depth') || + type === 'sint' || + type === 'uint'); + + return canFillWithRandomTypedData + ? createRandomTexelViewViaBytes(info) + : createRandomTexelViewViaColors(info, options); } /** * Creates a mip chain of TexelViews filled with random values */ -export function createRandomTexelViewMipmap(info: { - format: GPUTextureFormat; - size: GPUExtent3D; - mipLevelCount?: number; - dimension?: GPUTextureDimension; -}): TexelView[] { +function createRandomTexelViewMipmap( + info: { + format: GPUTextureFormat; + size: GPUExtent3D; + mipLevelCount?: number; + dimension?: GPUTextureDimension; + sampleCount?: number; + }, + options?: RandomTextureOptions | undefined +): TexelView[] { const mipLevelCount = info.mipLevelCount ?? 1; const dimension = info.dimension ?? '2d'; return range(mipLevelCount, i => - createRandomTexelView({ - format: info.format, - size: virtualMipSize(dimension, info.size, i), - mipLevel: i, - }) + createRandomTexelView( + { + format: info.format, + size: virtualMipSize(dimension, info.size, i), + mipLevel: i, + sampleCount: info.sampleCount ?? 1, + }, + options + ) ); } @@ -457,33 +1179,70 @@ export type Dimensionality = vec1 | vec2 | vec3; type TextureCallArgKeys = keyof TextureCallArgs; const kTextureCallArgNames: readonly TextureCallArgKeys[] = [ + 'component', 'coords', + 'derivativeMult', // NOTE: derivativeMult not an argument but is used with coords for implicit derivatives. 'arrayIndex', + 'bias', 'sampleIndex', 'mipLevel', 'ddx', 'ddy', + 'depthRef', 'offset', ] as const; export interface TextureCallArgs { - coords?: T; + component?: number; // Used by textureGather + coords?: T; // The coord passed + derivativeMult?: T; mipLevel?: number; arrayIndex?: number; + bias?: number; sampleIndex?: number; + depthRef?: number; ddx?: T; ddy?: T; offset?: T; } +export type TextureBuiltin = + | 'textureGather' + | 'textureGatherCompare' + | 'textureLoad' + | 'textureSample' + | 'textureSampleBaseClampToEdge' + | 'textureSampleBias' + | 'textureSampleCompare' + | 'textureSampleCompareLevel' + | 'textureSampleGrad' + | 'textureSampleLevel'; + export interface TextureCall extends TextureCallArgs { - builtin: 'textureLoad' | 'textureSample' | 'textureSampleBaseClampToEdge' | 'textureSampleLevel'; + builtin: TextureBuiltin; coordType: 'f' | 'i' | 'u'; levelType?: 'i' | 'u' | 'f'; arrayIndexType?: 'i' | 'u'; sampleIndexType?: 'i' | 'u'; + componentType?: 'i' | 'u'; } +const isBuiltinComparison = (builtin: TextureBuiltin) => + builtin === 'textureGatherCompare' || + builtin === 'textureSampleCompare' || + builtin === 'textureSampleCompareLevel'; +const isBuiltinGather = (builtin: TextureBuiltin | undefined) => + builtin === 'textureGather' || builtin === 'textureGatherCompare'; +const builtinNeedsSampler = (builtin: TextureBuiltin) => + builtin.startsWith('textureSample') || builtin.startsWith('textureGather'); +const builtinNeedsDerivatives = (builtin: TextureBuiltin) => + builtin === 'textureSample' || + builtin === 'textureSampleBias' || + builtin === 'textureSampleCompare'; + +const isCubeViewDimension = (viewDescriptor?: GPUTextureViewDescriptor) => + viewDescriptor?.dimension === 'cube' || viewDescriptor?.dimension === 'cube-array'; + const s_u32 = new Uint32Array(1); const s_f32 = new Float32Array(s_u32.buffer); const s_i32 = new Int32Array(s_u32.buffer); @@ -511,7 +1270,11 @@ function getCallArgType( ) { switch (argName) { case 'coords': + case 'derivativeMult': return call.coordType; + case 'component': + assert(call.componentType !== undefined); + return call.componentType; case 'mipLevel': assert(call.levelType !== undefined); return call.levelType; @@ -521,6 +1284,8 @@ function getCallArgType( case 'sampleIndex': assert(call.sampleIndexType !== undefined); return call.sampleIndexType; + case 'bias': + case 'depthRef': case 'ddx': case 'ddy': return 'f'; @@ -609,6 +1374,35 @@ function convertPerTexelComponentToResultFormat( return out; } +/** + * Convert RGBA result format to texel view format. + * Example, converts + * { R: 0.1, G: 0, B: 0, A: 1 } to { Depth: 0.1 } + * { R: 0.1 } to { R: 0.1, G: 0, B: 0, A: 1 } + */ +function convertToTexelViewFormat(src: PerTexelComponent, format: GPUTextureFormat) { + const componentOrder = isDepthTextureFormat(format) + ? [TexelComponent.Depth] + : isStencilTextureFormat(format) + ? [TexelComponent.Stencil] + : [TexelComponent.R, TexelComponent.G, TexelComponent.B, TexelComponent.A]; + const out: PerTexelComponent = {}; + for (const component of componentOrder) { + let v = src[component]; + if (v === undefined) { + if (component === 'Depth' || component === 'Stencil') { + v = src.R; + } else if (component === 'G' || component === 'B') { + v = 0; + } else { + v = 1; + } + } + out[component] = v; + } + return out; +} + /** * Convert RGBA result format to texel view format of src texture. * Effectively this converts something like { R: 0.1, G: 0, B: 0, A: 1 } @@ -631,14 +1425,45 @@ function zeroValuePerTexelComponent(components: TexelComponent[]) { for (const component of components) { out[component] = 0; } - return out; + return out; +} + +const kSamplerFns: Record boolean> = { + never: (ref: number, v: number) => false, + less: (ref: number, v: number) => ref < v, + equal: (ref: number, v: number) => ref === v, + 'less-equal': (ref: number, v: number) => ref <= v, + greater: (ref: number, v: number) => ref > v, + 'not-equal': (ref: number, v: number) => ref !== v, + 'greater-equal': (ref: number, v: number) => ref >= v, + always: (ref: number, v: number) => true, +} as const; + +function applyCompare( + call: TextureCall, + sampler: GPUSamplerDescriptor | undefined, + components: TexelComponent[], + src: PerTexelComponent +): PerTexelComponent { + if (isBuiltinComparison(call.builtin)) { + assert(sampler !== undefined); + assert(call.depthRef !== undefined); + const out: PerTexelComponent = {}; + const compareFn = kSamplerFns[sampler.compare!]; + for (const component of components) { + out[component] = compareFn(call.depthRef, src[component]!) ? 1 : 0; + } + return out; + } else { + return src; + } } /** * Returns the expect value for a WGSL builtin texture function for a single * mip level */ -export function softwareTextureReadMipLevel( +function softwareTextureReadMipLevel( call: TextureCall, texture: Texture, sampler: GPUSamplerDescriptor | undefined, @@ -661,10 +1486,7 @@ export function softwareTextureReadMipLevel( sampler?.addressModeW ?? 'clamp-to-edge', ]; - const isCube = - texture.viewDescriptor.dimension === 'cube' || - texture.viewDescriptor.dimension === 'cube-array'; - + const isCube = isCubeViewDimension(texture.viewDescriptor); const arrayIndexMult = isCube ? 6 : 1; const numLayers = textureSize[2] / arrayIndexMult; assert(numLayers % 1 === 0); @@ -684,8 +1506,14 @@ export function softwareTextureReadMipLevel( }; switch (call.builtin) { + case 'textureGather': + case 'textureGatherCompare': case 'textureSample': + case 'textureSampleBias': case 'textureSampleBaseClampToEdge': + case 'textureSampleCompare': + case 'textureSampleCompareLevel': + case 'textureSampleGrad': case 'textureSampleLevel': { let coords = toArray(call.coords!); @@ -714,7 +1542,7 @@ export function softwareTextureReadMipLevel( const samples: { at: number[]; weight: number }[] = []; - const filter = sampler?.minFilter ?? 'nearest'; + const filter = isBuiltinGather(call.builtin) ? 'linear' : sampler?.minFilter ?? 'nearest'; switch (filter) { case 'linear': { // 'p0' is the lower texel for 'at' @@ -733,10 +1561,11 @@ export function softwareTextureReadMipLevel( samples.push({ at: p1, weight: p1W[0] }); break; case 2: { - samples.push({ at: p0, weight: p0W[0] * p0W[1] }); - samples.push({ at: [p1[0], p0[1]], weight: p1W[0] * p0W[1] }); + // Note: These are ordered to match textureGather samples.push({ at: [p0[0], p1[1]], weight: p0W[0] * p1W[1] }); samples.push({ at: p1, weight: p1W[0] * p1W[1] }); + samples.push({ at: [p1[0], p0[1]], weight: p1W[0] * p0W[1] }); + samples.push({ at: p0, weight: p0W[0] * p0W[1] }); break; } case 3: { @@ -746,10 +1575,11 @@ export function softwareTextureReadMipLevel( // the slice they'll be wrapped by wrapFaceCoordToCubeFaceAtEdgeBoundaries // below. if (isCube) { - samples.push({ at: p0, weight: p0W[0] * p0W[1] }); - samples.push({ at: [p1[0], p0[1], p0[2]], weight: p1W[0] * p0W[1] }); + // Note: These are ordered to match textureGather samples.push({ at: [p0[0], p1[1], p0[2]], weight: p0W[0] * p1W[1] }); samples.push({ at: p1, weight: p1W[0] * p1W[1] }); + samples.push({ at: [p1[0], p0[1], p0[2]], weight: p1W[0] * p0W[1] }); + samples.push({ at: p0, weight: p0W[0] * p0W[1] }); const ndx = getUnusedCubeCornerSampleIndex(textureSize[0], coords as vec3); if (ndx >= 0) { // # Issues with corners of cubemaps @@ -783,7 +1613,16 @@ export function softwareTextureReadMipLevel( // I'm not sure what "average the values of the three available samples" // means. To me that would be (a+b+c)/3 or in other words, set all the // weights to 0.33333 but that's not what the M1 is doing. - unreachable('corners of cubemaps are not testable'); + // + // We could check that, given the 3 texels at the corner, if all 3 texels + // are the same value then the result must be the same value. Otherwise, + // the result must be between the 3 values. For now, the code that + // chooses test coordinates avoids corners. This has the restriction + // that the smallest mip level be at least 4x4 so there are some non + // corners to choose from. + unreachable( + `corners of cubemaps are not testable:\n ${describeTextureCall(call)}` + ); } } else { const p = [p0, p1]; @@ -813,16 +1652,33 @@ export function softwareTextureReadMipLevel( unreachable(); } + if (isBuiltinGather(call.builtin)) { + const componentNdx = call.component ?? 0; + assert(componentNdx >= 0 && componentNdx < 4); + assert(samples.length === 4); + const component = kRGBAComponents[componentNdx]; + const out: PerTexelComponent = {}; + samples.forEach((sample, i) => { + const c = isCube + ? wrapFaceCoordToCubeFaceAtEdgeBoundaries(textureSize[0], sample.at as vec3) + : applyAddressModesToCoords(addressMode, textureSize, sample.at); + const v = load(c); + const postV = applyCompare(call, sampler, rep.componentOrder, v); + const rgba = convertPerTexelComponentToResultFormat(postV, format); + out[kRGBAComponents[i]] = rgba[component]; + }); + return out; + } + const out: PerTexelComponent = {}; - const ss = []; for (const sample of samples) { const c = isCube ? wrapFaceCoordToCubeFaceAtEdgeBoundaries(textureSize[0], sample.at as vec3) : applyAddressModesToCoords(addressMode, textureSize, sample.at); const v = load(c); - ss.push(v); + const postV = applyCompare(call, sampler, rep.componentOrder, v); for (const component of rep.componentOrder) { - out[component] = (out[component] ?? 0) + v[component]! * sample.weight; + out[component] = (out[component] ?? 0) + postV[component]! * sample.weight; } } @@ -842,8 +1698,9 @@ export function softwareTextureReadMipLevel( /** * Reads a texture, optionally sampling between 2 mipLevels */ -export function softwareTextureReadLevel( +function softwareTextureReadLevel( t: GPUTest, + stage: ShaderStage, call: TextureCall, texture: Texture, sampler: GPUSamplerDescriptor | undefined, @@ -856,14 +1713,17 @@ export function softwareTextureReadLevel( return softwareTextureReadMipLevel(call, texture, sampler, mipLevel); } - switch (sampler.mipmapFilter) { + const effectiveMipmapFilter = isBuiltinGather(call.builtin) ? 'nearest' : sampler.mipmapFilter; + switch (effectiveMipmapFilter) { case 'linear': { const clampedMipLevel = clamp(mipLevel, { min: 0, max: maxLevel }); const baseMipLevel = Math.floor(clampedMipLevel); const nextMipLevel = Math.ceil(clampedMipLevel); const t0 = softwareTextureReadMipLevel(call, texture, sampler, baseMipLevel); const t1 = softwareTextureReadMipLevel(call, texture, sampler, nextMipLevel); - const mix = getWeightForMipLevel(t, mipLevelCount, mipLevel); + const weightType = call.builtin === 'textureSampleLevel' ? 'sampleLevelWeights' : 'identity'; + const mix = getWeightForMipLevel(t, stage, weightType, mipLevelCount, clampedMipLevel); + assert(mix >= 0 && mix <= 1); const values = [ { v: t0, weight: 1 - mix }, { v: t1, weight: mix }, @@ -885,45 +1745,153 @@ export function softwareTextureReadLevel( } } +function computeMipLevelFromGradients( + ddx: readonly number[], + ddy: readonly number[], + size: GPUExtent3D +) { + const texSize = reifyExtent3D(size); + const textureSize = [texSize.width, texSize.height, texSize.depthOrArrayLayers]; + + // Compute the mip level the same way textureSampleGrad does according to the spec. + const scaledDdx = ddx.map((v, i) => v * textureSize[i]); + const scaledDdy = ddy.map((v, i) => v * textureSize[i]); + const dotDDX = dotProduct(scaledDdx, scaledDdx); + const dotDDY = dotProduct(scaledDdy, scaledDdy); + const deltaMax = Math.max(dotDDX, dotDDY); + const mipLevel = 0.5 * Math.log2(deltaMax); + return mipLevel; +} + +function computeMipLevelFromGradientsForCall( + call: TextureCall, + size: GPUExtent3D +) { + assert(!!call.ddx); + assert(!!call.ddy); + // ddx and ddy are the values that would be passed to textureSampleGrad + // If we're emulating textureSample then they're the computed derivatives + // such that if we passed them to textureSampleGrad they'd produce the + // same result. + const ddx: readonly number[] = typeof call.ddx === 'number' ? [call.ddx] : call.ddx; + const ddy: readonly number[] = typeof call.ddy === 'number' ? [call.ddy] : call.ddy; + + return computeMipLevelFromGradients(ddx, ddy, size); +} + /** - * The software version of a texture builtin (eg: textureSample) - * Note that this is not a complete implementation. Rather it's only - * what's needed to generate the correct expected value for the tests. + * The software version of textureSampleGrad except with optional level. */ -export function softwareTextureRead( +function softwareTextureReadGrad( t: GPUTest, + stage: ShaderStage, call: TextureCall, texture: Texture, - sampler: GPUSamplerDescriptor + sampler?: GPUSamplerDescriptor ): PerTexelComponent { - assert(call.ddx !== undefined); - assert(call.ddy !== undefined); + const bias = call.bias === undefined ? 0 : clamp(call.bias, { min: -16.0, max: 15.99 }); + if (call.ddx) { + const mipLevel = computeMipLevelFromGradientsForCall(call, texture.descriptor.size); + const mipLevelCount = texture.descriptor.mipLevelCount ?? 1; + const clampedMipLevel = clamp(mipLevel + bias, { min: 0, max: mipLevelCount - 1 }); + const weightMipLevel = mapSoftwareMipLevelToGPUMipLevel(t, stage, clampedMipLevel); + return softwareTextureReadLevel(t, stage, call, texture, sampler, weightMipLevel); + } else { + return softwareTextureReadLevel(t, stage, call, texture, sampler, (call.mipLevel ?? 0) + bias); + } +} + +/** + * This must match the code in doTextureCalls for derivativeBase + * + * Note: normal implicit derivatives are computed like this + * + * fn textureSample(T, S, coord) -> vec4f { + * return textureSampleGrad(T, S, dpdx(coord), dpdy(coord)); + * } + * + * dpdx and dpdy are effectively computed by, + * getting the values of coord for 2x2 adjacent texels. + * + * p0 = coord value at x, y + * p1 = coord value at x + 1, y + * p2 = coord value at x, y + 1 + * p3 = coord value at x + 1, y + 1 + * + * dpdx is the average delta in x and dpdy is the average delta in y + * + * dpdx = (p1 - p0 + p3 - p2) / 2 // average of horizontal change + * dpdy = (p2 - p0 + p3 - p1) / 2 // average of vertical change + * + * derivativeBase is + * + * '1d' '2d' '3d' + * p0 = [0] [0, 0] [0, 0, 0] + * p1 = [1] [1, 0] [1, 0, 0] + * p2 = [0] [0, 1] [0, 1, 0] + * p3 = [1] [1, 1] [1, 1, 0] + * + * But, these values are normalized texels coords so if the src texture + * is 8x8 these would be * 0.125 + * + * Note: to test other derivatives we add in a multiplier but, + * this base gives us something to add that starts at 0,0 at the call + * but who's derivatives we can easily set. We need the default + * derivativeBase to be 1 otherwise it's 0 which makes the computed mip level + * be -Infinity which means bias in `textureSampleBias` has no meaning. + */ +function derivativeBaseForCall(texture: Texture, isDDX: boolean) { const texSize = reifyExtent3D(texture.descriptor.size); - const textureSize = [texSize.width, texSize.height]; + const textureSize = [texSize.width, texSize.height, texSize.depthOrArrayLayers]; + if (isCubeViewDimension(texture.viewDescriptor)) { + return (isDDX ? [1 / textureSize[0], 0, 1] : [0, 1 / textureSize[1], 1]) as T; + } else if (texture.descriptor.dimension === '3d') { + return (isDDX ? [1 / textureSize[0], 0, 0] : [0, 1 / textureSize[1], 0]) as T; + } else if (texture.descriptor.dimension === '1d') { + return [1 / textureSize[0]] as T; + } else { + return (isDDX ? [1 / textureSize[0], 0] : [0, 1 / textureSize[1]]) as T; + } +} - // ddx and ddy are the values that would be passed to textureSampleGrad - // If we're emulating textureSample then they're the computed derivatives - // such that if we passed them to textureSampleGrad they'd produce the - // same result. - const ddx: readonly number[] = typeof call.ddx === 'number' ? [call.ddx] : call.ddx; - const ddy: readonly number[] = typeof call.ddy === 'number' ? [call.ddy] : call.ddy; +/** + * Multiplies derivativeBase by derivativeMult or 1 + */ +function derivativeForCall( + texture: Texture, + call: TextureCall, + isDDX: boolean +) { + const dd = derivativeBaseForCall(texture, isDDX); + return dd.map((v, i) => v * (call.derivativeMult?.[i] ?? 1)) as T; +} - // Compute the mip level the same way textureSampleGrad does - const scaledDdx = ddx.map((v, i) => v * textureSize[i]); - const scaledDdy = ddy.map((v, i) => v * textureSize[i]); - const dotDDX = dotProduct(scaledDdx, scaledDdx); - const dotDDY = dotProduct(scaledDdy, scaledDdy); - const deltaMax = Math.max(dotDDX, dotDDY); - // MAINTENANCE_TODO: handle texture view baseMipLevel and mipLevelCount? - const mipLevel = 0.5 * Math.log2(deltaMax); - return softwareTextureReadLevel(t, call, texture, sampler, mipLevel); +function softwareTextureRead( + t: GPUTest, + stage: ShaderStage, + call: TextureCall, + texture: Texture, + sampler?: GPUSamplerDescriptor +): PerTexelComponent { + // add the implicit derivatives that we use from WGSL in doTextureCalls + if (builtinNeedsDerivatives(call.builtin) && !call.ddx) { + const newCall: TextureCall = { + ...call, + ddx: call.ddx ?? derivativeForCall(texture, call, true), + ddy: call.ddy ?? derivativeForCall(texture, call, false), + }; + call = newCall; + } + return softwareTextureReadGrad(t, stage, call, texture, sampler); } -export type TextureTestOptions = { +export type TextureTestOptions = { ddx?: number; // the derivative we want at sample time ddy?: number; - uvwStart?: readonly [number, number]; // the starting uv value (these are used make the coordinates negative as it uncovered issues on some hardware) - offset?: readonly [number, number]; // a constant offset + uvwStart?: Readonly; // the starting uv value (these are used make the coordinates negative as it uncovered issues on some hardware) + offset?: Readonly; // a constant offset + depthTexture?: boolean; + arrayIndexType?: 'i' | 'u'; }; /** @@ -1090,6 +2058,50 @@ function texelsApproximatelyEqual( return true; } +// If it's `textureGather` then we need to convert all values to one component. +// In other words, imagine the format is rg11b10ufloat. If it was +// `textureSample` we'd have `r11, g11, b10, a=1` but for `textureGather` +// +// component = 0 => `r11, r11, r11, r11` +// component = 1 => `g11, g11, g11, g11` +// component = 2 => `b10, b10, b10, b10` +// +// etc..., each from a different texel +// +// The Texel utils don't handle this. So if `component = 2` we take each value, +// copy it to the `B` component, run it through the texel utils so it returns +// the correct ULP for a 10bit float (not an 11 bit float). Then copy it back to +// the channel it came from. +function getULPFromZeroForComponents( + rgba: PerTexelComponent, + format: EncodableTextureFormat, + builtin: TextureBuiltin, + componentNdx?: number +): PerTexelComponent { + const rep = kTexelRepresentationInfo[format]; + if (isBuiltinGather(builtin)) { + const out: PerTexelComponent = {}; + const component = kRGBAComponents[componentNdx ?? 0]; + const temp: PerTexelComponent = { R: 0, G: 0, B: 0, A: 1 }; + for (const comp of kRGBAComponents) { + temp[component] = rgba[comp]; + const texel = convertResultFormatToTexelViewFormat(temp, format); + const ulp = convertPerTexelComponentToResultFormat( + rep.bitsToULPFromZero(rep.numberToBits(texel)), + format + ); + out[comp] = ulp[component]; + } + return out; + } else { + const texel = convertResultFormatToTexelViewFormat(rgba, format); + return convertPerTexelComponentToResultFormat( + rep.bitsToULPFromZero(rep.numberToBits(texel)), + format + ); + } +} + /** * Checks the result of each call matches the expected result. */ @@ -1099,11 +2111,24 @@ export async function checkCallResults( textureType: string, sampler: GPUSamplerDescriptor | undefined, calls: TextureCall[], - results: PerTexelComponent[] + results: Awaited>>, + shortShaderStage: ShortShaderStage, + gpuTexture?: GPUTexture ) { + const stage = kShortShaderStageToShaderStage[shortShaderStage]; + await initMipGradientValuesForDevice(t, stage); + + let haveComparisonCheckInfo = false; + let checkInfo = { + runner: results.runner, + calls, + sampler, + }; + // These are only read if the tests fail. They are used to get the values from the + // GPU texture for displaying in diagnostics. + let gpuTexels: TexelView[] | undefined; const errs: string[] = []; const format = texture.texels[0].format; - const rep = kTexelRepresentationInfo[format]; const size = reifyExtent3D(texture.descriptor.size); const maxFractionalDiff = sampler?.minFilter === 'linear' || @@ -1112,10 +2137,20 @@ export async function checkCallResults( ? getMaxFractionalDiffForTextureFormat(texture.descriptor.format) : 0; - for (let callIdx = 0; callIdx < calls.length && errs.length === 0; callIdx++) { + for (let callIdx = 0; callIdx < calls.length; callIdx++) { const call = calls[callIdx]; - const gotRGBA = results[callIdx]; - const expectRGBA = softwareTextureReadLevel(t, call, texture, sampler, call.mipLevel ?? 0); + const gotRGBA = results.results[callIdx]; + const expectRGBA = softwareTextureRead(t, stage, call, texture, sampler); + + // The spec says depth and stencil have implementation defined values for G, B, and A + // so if this is `textureGather` and component > 0 then there's nothing to check. + if ( + isDepthOrStencilTextureFormat(format) && + isBuiltinGather(call.builtin) && + call.component! > 0 + ) { + continue; + } if (texelsApproximatelyEqual(gotRGBA, expectRGBA, format, maxFractionalDiff)) { continue; @@ -1125,237 +2160,235 @@ export async function checkCallResults( continue; } - const got = convertResultFormatToTexelViewFormat(gotRGBA, format); - const expect = convertResultFormatToTexelViewFormat(expectRGBA, format); - const gULP = rep.bitsToULPFromZero(rep.numberToBits(got)); - const eULP = rep.bitsToULPFromZero(rep.numberToBits(expect)); - for (const component of rep.componentOrder) { - const g = got[component]!; - const e = expect[component]!; + const gULP = getULPFromZeroForComponents(gotRGBA, format, call.builtin, call.component); + const eULP = getULPFromZeroForComponents(expectRGBA, format, call.builtin, call.component); + + // from the spec: https://gpuweb.github.io/gpuweb/#reading-depth-stencil + // depth and stencil values are D, ?, ?, ? + const rgbaComponentsToCheck = + isBuiltinGather(call.builtin) || !isDepthOrStencilTextureFormat(format) + ? kRGBAComponents + : kRComponent; + + let bad = false; + const diffs = rgbaComponentsToCheck.map(component => { + const g = gotRGBA[component]!; + const e = expectRGBA[component]!; const absDiff = Math.abs(g - e); const ulpDiff = Math.abs(gULP[component]! - eULP[component]!); - const relDiff = absDiff / Math.max(Math.abs(g), Math.abs(e)); + assert(!Number.isNaN(ulpDiff)); + const maxAbs = Math.max(Math.abs(g), Math.abs(e)); + const relDiff = maxAbs > 0 ? absDiff / maxAbs : 0; if (ulpDiff > 3 && absDiff > maxFractionalDiff) { - const desc = describeTextureCall(call); - errs.push(`component was not as expected: + bad = true; + } + return { absDiff, relDiff, ulpDiff }; + }); + + const isFloatType = (format: GPUTextureFormat) => { + const info = kTextureFormatInfo[format]; + return info.color?.type === 'float' || info.depth?.type === 'depth'; + }; + const fix5 = (n: number) => (isFloatType(format) ? n.toFixed(5) : n.toString()); + const fix5v = (arr: number[]) => arr.map(v => fix5(v)).join(', '); + const rgbaToArray = (p: PerTexelComponent): number[] => + rgbaComponentsToCheck.map(component => p[component]!); + + if (bad) { + const desc = describeTextureCall(call); + errs.push(`result was not as expected: size: [${size.width}, ${size.height}, ${size.depthOrArrayLayers}] mipCount: ${texture.descriptor.mipLevelCount ?? 1} - call: ${desc} // #${callIdx} - component: ${component} - got: ${g} - expected: ${e} - abs diff: ${absDiff.toFixed(4)} - rel diff: ${(relDiff * 100).toFixed(2)}% - ulp diff: ${ulpDiff} + call: ${desc} // #${callIdx}`); + if (isCubeViewDimension(texture.viewDescriptor)) { + const coord = convertCubeCoordToNormalized3DTextureCoord(call.coords as vec3); + const faceNdx = Math.floor(coord[2] * 6); + errs.push(` : as 3D texture coord: (${coord[0]}, ${coord[1]}, ${coord[2]})`); + for (let mipLevel = 0; mipLevel < (texture.descriptor.mipLevelCount ?? 1); ++mipLevel) { + const mipSize = virtualMipSize( + texture.descriptor.dimension ?? '2d', + texture.descriptor.size, + mipLevel + ); + const t = coord.slice(0, 2).map((v, i) => (v * mipSize[i]).toFixed(3)); + errs.push( + ` : as texel coord mip level[${mipLevel}]: (${t[0]}, ${t[1]}), face: ${faceNdx}(${kFaceNames[faceNdx]})` + ); + } + } else { + for (let mipLevel = 0; mipLevel < (texture.descriptor.mipLevelCount ?? 1); ++mipLevel) { + const mipSize = virtualMipSize( + texture.descriptor.dimension ?? '2d', + texture.descriptor.size, + mipLevel + ); + const t = call.coords!.map((v, i) => (v * mipSize[i]).toFixed(3)); + errs.push(` : as texel coord @ mip level[${mipLevel}]: (${t.join(', ')})`); + } + } + if (builtinNeedsDerivatives(call.builtin)) { + const ddx = derivativeForCall(texture, call, true); + const ddy = derivativeForCall(texture, call, false); + const mipLevel = computeMipLevelFromGradients(ddx, ddy, size); + const biasStr = call.bias === undefined ? '' : ' (without bias)'; + errs.push(`implicit derivative based mip level: ${fix5(mipLevel)}${biasStr}`); + if (call.bias) { + const clampedBias = clamp(call.bias ?? 0, { min: -16.0, max: 15.99 }); + errs.push(`\ + clamped bias: ${fix5(clampedBias)} + mip level with bias: ${fix5(mipLevel + clampedBias)}`); + } + } else if (call.ddx) { + const mipLevel = computeMipLevelFromGradientsForCall(call, size); + errs.push(`gradient based mip level: ${mipLevel}`); + } + errs.push(`\ + got: ${fix5v(rgbaToArray(gotRGBA))} + expected: ${fix5v(rgbaToArray(expectRGBA))} + max diff: ${maxFractionalDiff} + abs diffs: ${fix5v(diffs.map(({ absDiff }) => absDiff))} + rel diffs: ${diffs.map(({ relDiff }) => `${(relDiff * 100).toFixed(2)}%`).join(', ')} + ulp diffs: ${diffs.map(({ ulpDiff }) => ulpDiff).join(', ')} `); - if (sampler) { + + if (sampler) { + if (t.rec.debugging) { + // For compares, we can't use the builtin (textureXXXCompareXXX) because it only + // returns 0 or 1 or the average of 0 and 1 for multiple samples. And, for example, + // if the comparison is `always` then every sample returns 1. So we need to use the + // corresponding sample function to get the actual values from the textures + // + // textureSampleCompare -> textureSample + // textureSampleCompareLevel -> textureSampleLevel + // textureGatherCompare -> textureGather + if (isBuiltinComparison(call.builtin)) { + if (!haveComparisonCheckInfo) { + // Convert the comparison calls to their corresponding non-comparison call + const debugCalls = calls.map(call => { + const debugCall = { ...call }; + debugCall.depthRef = undefined; + switch (call.builtin) { + case 'textureGatherCompare': + debugCall.builtin = 'textureGather'; + break; + case 'textureSampleCompare': + debugCall.builtin = 'textureSample'; + break; + case 'textureSampleCompareLevel': + debugCall.builtin = 'textureSampleLevel'; + debugCall.levelType = 'f'; + debugCall.mipLevel = 0; + break; + default: + unreachable(); + } + return debugCall; + }); + + // Convert the comparison sampler to a non-comparison sampler + const debugSampler = { ...sampler }; + delete debugSampler.compare; + + // Make a runner for these changed calls. + const debugRunner = createTextureCallsRunner( + t, + { + format, + dimension: texture.descriptor.dimension ?? '2d', + sampleCount: texture.descriptor.sampleCount ?? 1, + depthOrArrayLayers: size.depthOrArrayLayers, + }, + texture.viewDescriptor, + textureType, + debugSampler, + debugCalls, + stage + ); + checkInfo = { + runner: debugRunner, + sampler: debugSampler, + calls: debugCalls, + }; + haveComparisonCheckInfo = true; + } + } + + if (!gpuTexels && gpuTexture) { + // Read the texture back if we haven't yet. We'll use this + // to get values for each sample point. + gpuTexels = await readTextureToTexelViews( + t, + gpuTexture, + texture.descriptor, + getTexelViewFormatForTextureFormat(gpuTexture.format) + ); + } + + const callForSamplePoints = checkInfo.calls[callIdx]; + const expectedSamplePoints = [ 'expected:', - ...(await identifySamplePoints(texture, (texels: TexelView[]) => { - return Promise.resolve( - softwareTextureReadLevel( - t, - call, - { - texels, - descriptor: texture.descriptor, - viewDescriptor: texture.viewDescriptor, - }, - sampler, - call.mipLevel ?? 0 - ) - ); - })), + ...(await identifySamplePoints( + texture, + sampler, + callForSamplePoints, + call, + texture.texels, + (texels: TexelView[]) => { + return Promise.resolve( + softwareTextureRead( + t, + stage, + callForSamplePoints, + { + texels, + descriptor: texture.descriptor, + viewDescriptor: texture.viewDescriptor, + }, + checkInfo.sampler + ) + ); + } + )), ]; const gotSamplePoints = [ 'got:', - ...(await identifySamplePoints(texture, async (texels: TexelView[]) => { - const gpuTexture = createTextureFromTexelViews(t, texels, texture.descriptor); - const result = ( - await doTextureCalls(t, gpuTexture, texture.viewDescriptor, textureType, sampler, [ - call, - ]) - )[0]; - gpuTexture.destroy(); - return result; - })), + ...(await identifySamplePoints( + texture, + sampler, + callForSamplePoints, + call, + gpuTexels, + async (texels: TexelView[]) => { + const gpuTexture = createTextureFromTexelViewsLocal(t, texels, texture.descriptor); + const result = (await checkInfo.runner.run(gpuTexture))[callIdx]; + gpuTexture.destroy(); + return result; + } + )), ]; errs.push(' sample points:'); errs.push(layoutTwoColumns(expectedSamplePoints, gotSamplePoints).join('\n')); errs.push('', ''); } - } - } - } - - return errs.length > 0 ? new Error(errs.join('\n')) : undefined; -} - -/** - * "Renders a quad" to a TexelView with the given parameters, - * sampling from the given Texture. - */ -export function softwareRasterize( - t: GPUTest, - texture: Texture, - sampler: GPUSamplerDescriptor, - targetSize: [number, number], - options: TextureTestOptions -) { - const [width, height] = targetSize; - const { ddx = 1, ddy = 1, uvwStart = [0, 0] } = options; - const format = 'rgba32float'; - - const textureSize = reifyExtent3D(texture.descriptor.size); - - // MAINTENANCE_TODO: Consider passing these in as a similar computation - // happens in putDataInTextureThenDrawAndCheckResultsComparedToSoftwareRasterizer. - // The issue is there, the calculation is "what do we need to multiply the unitQuad - // by to get the derivatives we want". The calculation here is "what coordinate - // will we get for a given frag coordinate". It turns out to be the same calculation - // but needs rephrasing them so they are more obviously the same would help - // consolidate them into one calculation. - const screenSpaceUMult = (ddx * width) / textureSize.width; - const screenSpaceVMult = (ddy * height) / textureSize.height; - - const rep = kTexelRepresentationInfo[format]; - - const expData = new Float32Array(width * height * 4); - for (let y = 0; y < height; ++y) { - const fragY = height - y - 1 + 0.5; - for (let x = 0; x < width; ++x) { - const fragX = x + 0.5; - // This code calculates the same value that will be passed to - // `textureSample` in the fragment shader for a given frag coord (see the - // WGSL code which uses the same formula, but using interpolation). That - // shader renders a clip space quad and includes a inter-stage "uv" - // coordinates that start with a unit quad (0,0) to (1,1) and is - // multiplied by ddx,ddy and as added in uStart and vStart - // - // uv = unitQuad * vec2(ddx, ddy) + vec2(vStart, uStart); - // - // softwareTextureRead simulates a single call to `textureSample` so - // here we're computing the `uv` value that will be passed for a - // particular fragment coordinate. fragX / width, fragY / height provides - // the unitQuad value. - // - // ddx and ddy in this case are the derivative values we want to test. We - // pass those into the softwareTextureRead as they would normally be - // derived from the change in coord. - const coords = [ - (fragX / width) * screenSpaceUMult + uvwStart[0], - (fragY / height) * screenSpaceVMult + uvwStart[1], - ] as T; - const call: TextureCall = { - builtin: 'textureSample', - coordType: 'f', - coords, - ddx: [ddx / textureSize.width, 0] as T, - ddy: [0, ddy / textureSize.height] as T, - offset: options.offset as T, - }; - const sample = softwareTextureRead(t, call, texture, sampler); - const rgba = { R: 0, G: 0, B: 0, A: 1, ...sample }; - const asRgba32Float = new Float32Array(rep.pack(rgba)); - expData.set(asRgba32Float, (y * width + x) * 4); - } - } - - return TexelView.fromTextureDataByReference(format, new Uint8Array(expData.buffer), { - bytesPerRow: width * 4 * 4, - rowsPerImage: height, - subrectOrigin: [0, 0, 0], - subrectSize: targetSize, - }); -} - -/** - * Render textured quad to an rgba32float texture. - */ -export function drawTexture( - t: GPUTest & TextureTestMixinType, - texture: GPUTexture, - samplerDesc: GPUSamplerDescriptor, - options: TextureTestOptions -) { - const device = t.device; - const { ddx = 1, ddy = 1, uvwStart = [0, 0, 0], offset } = options; - - const format = 'rgba32float'; - const renderTarget = t.createTextureTracked({ - format, - size: [32, 32], - usage: GPUTextureUsage.COPY_SRC | GPUTextureUsage.RENDER_ATTACHMENT, - }); - - // Compute the amount we need to multiply the unitQuad by get the - // derivatives we want. - const uMult = (ddx * renderTarget.width) / texture.width; - const vMult = (ddy * renderTarget.height) / texture.height; - - const offsetWGSL = offset ? `, vec2i(${offset[0]},${offset[1]})` : ''; - - const code = ` -struct InOut { - @builtin(position) pos: vec4f, - @location(0) uv: vec2f, -}; - -@vertex fn vs(@builtin(vertex_index) vertex_index : u32) -> InOut { - let positions = array( - vec2f(-1, 1), vec2f( 1, 1), - vec2f(-1, -1), vec2f( 1, -1), - ); - let pos = positions[vertex_index]; - return InOut( - vec4f(pos, 0, 1), - (pos * 0.5 + 0.5) * vec2f(${uMult}, ${vMult}) + vec2f(${uvwStart[0]}, ${uvwStart[1]}), - ); -} - -@group(0) @binding(0) var T : texture_2d; -@group(0) @binding(1) var S : sampler; - -@fragment fn fs(v: InOut) -> @location(0) vec4f { - return textureSample(T, S, v.uv${offsetWGSL}); -} -`; - - const shaderModule = device.createShaderModule({ code }); - const pipeline = device.createRenderPipeline({ - layout: 'auto', - vertex: { module: shaderModule }, - fragment: { - module: shaderModule, - targets: [{ format }], - }, - primitive: { topology: 'triangle-strip' }, - }); - - const sampler = device.createSampler(samplerDesc); - - const bindGroup = device.createBindGroup({ - layout: pipeline.getBindGroupLayout(0), - entries: [ - { binding: 0, resource: texture.createView() }, - { binding: 1, resource: sampler }, - ], - }); - - const encoder = device.createCommandEncoder(); + // this is not an else because it's common to comment out the previous `if` for running on a CQ. + if (!t.rec.debugging) { + errs.push('### turn on debugging to see sample points ###'); + } + } // if (sampler) - const renderPass = encoder.beginRenderPass({ - colorAttachments: [{ view: renderTarget.createView(), loadOp: 'clear', storeOp: 'store' }], - }); + // Don't report the other errors. There 50 sample points per subcase and + // 50-100 subcases so the log would get enormous if all 50 fail. One + // report per subcase is enough. + break; + } // if (bad) + } // for cellNdx - renderPass.setPipeline(pipeline); - renderPass.setBindGroup(0, bindGroup); - renderPass.draw(4); - renderPass.end(); - device.queue.submit([encoder.finish()]); + results.runner.destroy(); + checkInfo.runner.destroy(); - return renderTarget; + return errs.length > 0 ? new Error(errs.join('\n')) : undefined; } function getMaxFractionalDiffForTextureFormat(format: GPUTextureFormat) { @@ -1399,11 +2432,11 @@ function getMaxFractionalDiffForTextureFormat(format: GPUTextureFormat) { // tolerances if possible. if (format.includes('depth')) { - return 3 / 65536; + return 3 / 100; } else if (format.includes('8unorm')) { return 7 / 255; } else if (format.includes('2unorm')) { - return 9 / 512; + return 13 / 512; } else if (format.includes('unorm')) { return 7 / 255; } else if (format.includes('8snorm')) { @@ -1420,49 +2453,6 @@ function getMaxFractionalDiffForTextureFormat(format: GPUTextureFormat) { } } -export function checkTextureMatchesExpectedTexelView( - t: GPUTest & TextureTestMixinType, - format: GPUTextureFormat, - actualTexture: GPUTexture, - expectedTexelView: TexelView -) { - const maxFractionalDiff = getMaxFractionalDiffForTextureFormat(format); - t.expectTexelViewComparisonIsOkInTexture( - { texture: actualTexture }, - expectedTexelView, - [actualTexture.width, actualTexture.height], - { maxFractionalDiff } - ); -} - -/** - * Puts data in a texture. Renders a quad to a rgba32float. Then "software renders" - * to a TexelView the expected result and compares the rendered texture to the - * expected TexelView. - */ -export async function putDataInTextureThenDrawAndCheckResultsComparedToSoftwareRasterizer< - T extends Dimensionality, ->( - t: GPUTest & TextureTestMixinType, - descriptor: GPUTextureDescriptor, - viewDescriptor: GPUTextureViewDescriptor, - samplerDesc: GPUSamplerDescriptor, - options: TextureTestOptions -) { - const { texture, texels } = await createTextureWithRandomDataAndGetTexels(t, descriptor); - - const actualTexture = drawTexture(t, texture, samplerDesc, options); - const expectedTexelView = softwareRasterize( - t, - { descriptor, texels, viewDescriptor }, - samplerDesc, - [actualTexture.width, actualTexture.height], - options - ); - - checkTextureMatchesExpectedTexelView(t, texture.format, actualTexture, expectedTexelView); -} - const sumOfCharCodesOfString = (s: unknown) => String(s) .split('') @@ -1533,7 +2523,7 @@ function getBlockFiller(format: GPUTextureFormat) { /** * Fills a texture with random data. */ -export function fillTextureWithRandomData(device: GPUDevice, texture: GPUTexture) { +function fillTextureWithRandomData(device: GPUDevice, texture: GPUTexture) { assert(!isCompressedFloatTextureFormat(texture.format)); const info = kTextureFormatInfo[texture.format]; const hashBase = @@ -1572,7 +2562,7 @@ const s_readTextureToRGBA32DeviceToPipeline = new WeakMap< // MAINTENANCE_TODO: remove cast once textureBindingViewDimension is added to IDL function getEffectiveViewDimension( t: GPUTest, - descriptor: GPUTextureDescriptor + descriptor: Omit ): GPUTextureViewDimension { const { textureBindingViewDimension } = descriptor as unknown as { textureBindingViewDimension?: GPUTextureViewDimension; @@ -1588,7 +2578,7 @@ function getEffectiveViewDimension( export async function readTextureToTexelViews( t: GPUTest, texture: GPUTexture, - descriptor: GPUTextureDescriptor, + descriptor: Omit, format: EncodableTextureFormat ) { const device = t.device; @@ -1763,6 +2753,19 @@ export async function readTextureToTexelViews( return texelViews; } +function createTextureFromTexelViewsLocal( + t: GPUTest, + texelViews: TexelView[], + desc: Omit +): GPUTexture { + const modifiedDescriptor = { ...desc }; + // If it's a depth or stencil texture we need to render to it to fill it with data. + if (isDepthOrStencilTextureFormat(texelViews[0].format)) { + modifiedDescriptor.usage = desc.usage | GPUTextureUsage.RENDER_ATTACHMENT; + } + return createTextureFromTexelViews(t, texelViews, modifiedDescriptor); +} + /** * Fills a texture with random data and returns that data as * an array of TexelView. @@ -1776,9 +2779,11 @@ export async function readTextureToTexelViews( */ export async function createTextureWithRandomDataAndGetTexels( t: GPUTest, - descriptor: GPUTextureDescriptor + descriptor: GPUTextureDescriptor, + options?: RandomTextureOptions ) { if (isCompressedTextureFormat(descriptor.format)) { + assert(!options, 'options not supported for compressed textures'); const texture = t.createTextureTracked(descriptor); fillTextureWithRandomData(t.device, texture); @@ -1790,15 +2795,15 @@ export async function createTextureWithRandomDataAndGetTexels( ); return { texture, texels }; } else { - const texels = createRandomTexelViewMipmap(descriptor); - const texture = createTextureFromTexelViews(t, texels, descriptor); + const texels = createRandomTexelViewMipmap(descriptor, options); + const texture = createTextureFromTexelViewsLocal(t, texels, descriptor); return { texture, texels }; } } function valueIfAllComponentsAreEqual( c: PerTexelComponent, - componentOrder: TexelComponent[] + componentOrder: readonly TexelComponent[] ) { const s = new Set(componentOrder.map(component => c[component]!)); return s.size === 1 ? s.values().next().value : undefined; @@ -1873,32 +2878,36 @@ const kFaceNames = ['+x', '-x', '+y', '-y', '+z', '-z'] as const; * Example: * * 0 1 2 3 4 5 6 7 - * ┌───┬───┬───┬───┬───┬───┬───┬───┐ - * 0 │ │ │ │ │ │ │ │ │ - * ├───┼───┼───┼───┼───┼───┼───┼───┤ - * 1 │ │ │ │ │ │ │ │ a │ - * ├───┼───┼───┼───┼───┼───┼───┼───┤ - * 2 │ │ │ │ │ │ │ │ b │ - * ├───┼───┼───┼───┼───┼───┼───┼───┤ - * 3 │ │ │ │ │ │ │ │ │ - * ├───┼───┼───┼───┼───┼───┼───┼───┤ - * 4 │ │ │ │ │ │ │ │ │ - * ├───┼───┼───┼───┼───┼───┼───┼───┤ - * 5 │ │ │ │ │ │ │ │ │ - * ├───┼───┼───┼───┼───┼───┼───┼───┤ - * 6 │ │ │ │ │ │ │ │ │ - * ├───┼───┼───┼───┼───┼───┼───┼───┤ - * 7 │ │ │ │ │ │ │ │ │ - * └───┴───┴───┴───┴───┴───┴───┴───┘ + * +---+---+---+---+---+---+---+---+ + * 0 | | | | | | | | | + * +---+---+---+---+---+---+---+---+ + * 1 | | | | | | | | a | + * +---+---+---+---+---+---+---+---+ + * 2 | | | | | | | | b | + * +---+---+---+---+---+---+---+---+ + * 3 | | | | | | | | | + * +---+---+---+---+---+---+---+---+ + * 4 | | | | | | | | | + * +---+---+---+---+---+---+---+---+ + * 5 | | | | | | | | | + * +---+---+---+---+---+---+---+---+ + * 6 | | | | | | | | | + * +---+---+---+---+---+---+---+---+ + * 7 | | | | | | | | | + * +---+---+---+---+---+---+---+---+ * a: at: [7, 1], weights: [R: 0.75000] * b: at: [7, 2], weights: [R: 0.25000] */ -async function identifySamplePoints( +async function identifySamplePoints( texture: Texture, + sampler: GPUSamplerDescriptor, + callForSamples: TextureCall, + originalCall: TextureCall, + texels: TexelView[] | undefined, run: (texels: TexelView[]) => Promise> ) { const info = texture.descriptor; - const isCube = texture.viewDescriptor.dimension === 'cube'; + const isCube = isCubeViewDimension(texture.viewDescriptor); const mipLevelCount = texture.descriptor.mipLevelCount ?? 1; const mipLevelSize = range(mipLevelCount, mipLevel => virtualMipSize(texture.descriptor.dimension ?? '2d', texture.descriptor.size, mipLevel) @@ -1914,6 +2923,27 @@ async function identifySamplePoints( })(); const numTexels = numTexelsPerLevel.reduce((sum, v) => sum + v); + const getMipLevelFromTexelId = (texelId: number) => { + for (let mipLevel = mipLevelCount - 1; mipLevel > 0; --mipLevel) { + if (texelId - numTexelsOfPrecedingLevels[mipLevel] >= 0) { + return mipLevel; + } + } + return 0; + }; + + const getTexelCoordFromTexelId = (texelId: number) => { + const mipLevel = getMipLevelFromTexelId(texelId); + const size = mipLevelSize[mipLevel]; + const texelsPerSlice = size[0] * size[1]; + const id = texelId - numTexelsOfPrecedingLevels[mipLevel]; + const layer = Math.floor(id / texelsPerSlice); + const xyId = id - layer * texelsPerSlice; + const y = (xyId / size[0]) | 0; + const x = xyId % size[0]; + return { x, y, z: layer, mipLevel, xyId }; + }; + // This isn't perfect. We already know there was an error. We're just // generating info so it seems okay it's not perfect. This format will // be used to generate weights by drawing with a texture of this format @@ -1934,6 +2964,11 @@ async function identifySamplePoints( ) as EncodableTextureFormat; const rep = kTexelRepresentationInfo[format]; + const components = isBuiltinGather(callForSamples.builtin) ? kRGBAComponents : rep.componentOrder; + const convertResultAsAppropriate = isBuiltinGather(callForSamples.builtin) + ? (v: T) => v + : convertResultFormatToTexelViewFormat; + // Identify all the texels that are sampled, and their weights. const sampledTexelWeights = new Map>(); const unclassifiedStack = [new Set(range(numTexels, v => v))]; @@ -1951,8 +2986,8 @@ async function identifySamplePoints( unclassifiedStack.push(setB); } - // See if any of the texels in setA were sampled. - const results = convertResultFormatToTexelViewFormat( + // See if any of the texels in setA were sampled.0 + const results = convertResultAsAppropriate( await run( range(mipLevelCount, mipLevel => TexelView.fromTexelsAsColors( @@ -1978,7 +3013,7 @@ async function identifySamplePoints( ), format ); - if (rep.componentOrder.some(c => results[c] !== 0)) { + if (components.some(c => results[c] !== 0)) { // One or more texels of setA were sampled. if (setA.size === 1) { // We identified a specific texel was sampled. @@ -1991,40 +3026,26 @@ async function identifySamplePoints( } } - const getMipLevelFromTexelId = (texelId: number) => { - for (let mipLevel = mipLevelCount - 1; mipLevel > 0; --mipLevel) { - if (texelId - numTexelsOfPrecedingLevels[mipLevel] >= 0) { - return mipLevel; - } - } - return 0; - }; - // separate the sampledTexelWeights by mipLevel, then by layer, within a layer the texelId only includes x and y const levels: Map>[][] = []; for (const [texelId, weight] of sampledTexelWeights.entries()) { - const mipLevel = getMipLevelFromTexelId(texelId); + const { xyId, z, mipLevel } = getTexelCoordFromTexelId(texelId); const level = levels[mipLevel] ?? []; levels[mipLevel] = level; - const size = mipLevelSize[mipLevel]; - const texelsPerSlice = size[0] * size[1]; - const id = texelId - numTexelsOfPrecedingLevels[mipLevel]; - const layer = Math.floor(id / texelsPerSlice); - const layerEntries = level[layer] ?? new Map(); - level[layer] = layerEntries; - const xyId = id - layer * texelsPerSlice; + const layerEntries = level[z] ?? new Map(); + level[z] = layerEntries; layerEntries.set(xyId, weight); } - // ┌───┬───┬───┬───┐ - // │ a │ │ │ │ - // ├───┼───┼───┼───┤ - // │ │ │ │ │ - // ├───┼───┼───┼───┤ - // │ │ │ │ │ - // ├───┼───┼───┼───┤ - // │ │ │ │ b │ - // └───┴───┴───┴───┘ + // +---+---+---+---+ + // | a | | | | + // +---+---+---+---+ + // | | | | | + // +---+---+---+---+ + // | | | | | + // +---+---+---+---+ + // | | | | b | + // +---+---+---+---+ const lines: string[] = []; const letter = (idx: number) => String.fromCodePoint(idx < 30 ? 97 + idx : idx + 9600 - 30); // 97: 'a' let idCount = 0; @@ -2040,13 +3061,20 @@ async function identifySamplePoints( for (let layer = 0; layer < depthOrArrayLayers; ++layer) { const layerEntries = level[layer]; - if (!layerEntries) { - continue; - } const orderedTexelIndices: number[] = []; lines.push(''); - lines.push(`layer: ${layer}${isCube ? ` (${kFaceNames[layer]})` : ''}`); + const unSampled = layerEntries ? '' : 'un-sampled'; + if (isCube) { + const face = kFaceNames[layer % 6]; + lines.push(`layer: ${layer}, cube-layer: ${(layer / 6) | 0} (${face}) ${unSampled}`); + } else { + lines.push(`layer: ${layer} ${unSampled}`); + } + + if (!layerEntries) { + continue; + } { let line = ' '; @@ -2056,57 +3084,92 @@ async function identifySamplePoints( lines.push(line); } { - let line = ' ┌'; + let line = ' +'; for (let x = 0; x < width; x++) { - line += x === width - 1 ? '───┐' : '───┬'; + line += x === width - 1 ? '---+' : '---+'; } lines.push(line); } for (let y = 0; y < height; y++) { { - let line = `${y.toString().padEnd(2)}│`; + let line = `${y.toString().padEnd(2)}|`; for (let x = 0; x < width; x++) { const texelIdx = x + y * texelsPerRow; const weight = layerEntries.get(texelIdx); if (weight !== undefined) { - line += ` ${letter(idCount + orderedTexelIndices.length)} │`; + line += ` ${letter(idCount + orderedTexelIndices.length)} |`; orderedTexelIndices.push(texelIdx); } else { - line += ' │'; + line += ' |'; } } lines.push(line); } if (y < height - 1) { - let line = ' ├'; + let line = ' +'; for (let x = 0; x < width; x++) { - line += x === width - 1 ? '───┤' : '───┼'; + line += x === width - 1 ? '---+' : '---+'; } lines.push(line); } } { - let line = ' └'; + let line = ' +'; for (let x = 0; x < width; x++) { - line += x === width - 1 ? '───┘' : '───┴'; + line += x === width - 1 ? '---+' : '---+'; } lines.push(line); } const pad2 = (n: number) => n.toString().padStart(2); const fix5 = (n: number) => n.toFixed(5); + const formatTexel = (texel: PerTexelComponent | undefined) => + texel + ? Object.entries(texel) + .map(([k, v]) => `${k}: ${fix5(v)}`) + .join(', ') + : '*texel values unavailable*'; + + const colorLines: string[] = []; + const compareLines: string[] = []; + let levelWeight = 0; orderedTexelIndices.forEach((texelIdx, i) => { const weights = layerEntries.get(texelIdx)!; const y = Math.floor(texelIdx / texelsPerRow); const x = texelIdx % texelsPerRow; - const singleWeight = valueIfAllComponentsAreEqual(weights, rep.componentOrder); + const singleWeight = valueIfAllComponentsAreEqual(weights, components)!; + levelWeight += singleWeight; const w = singleWeight !== undefined ? `weight: ${fix5(singleWeight)}` - : `weights: [${rep.componentOrder.map(c => `${c}: ${fix5(weights[c]!)}`).join(', ')}]`; + : `weights: [${components.map(c => `${c}: ${fix5(weights[c]!)}`).join(', ')}]`; const coord = `${pad2(x)}, ${pad2(y)}, ${pad2(layer)}`; - lines.push(`${letter(idCount + i)}: mip(${mipLevel}) at: [${coord}], ${w}`); + const texel = + texels && + convertToTexelViewFormat( + texels[mipLevel].color({ x, y, z: layer }), + texture.descriptor.format + ); + + const texelStr = formatTexel(texel); + const id = letter(idCount + i); + lines.push(`${id}: mip(${mipLevel}) at: [${coord}], ${w}`); + colorLines.push(`${id}: value: ${texelStr}`); + if (isBuiltinComparison(originalCall.builtin)) { + assert(!!texel); + const compareTexel = applyCompare(originalCall, sampler, [TexelComponent.Depth], texel); + compareLines.push( + `${id}: compare(${sampler.compare}) result with depthRef(${fix5( + originalCall.depthRef! + )}): ${fix5(compareTexel.Depth!)}` + ); + } }); + lines.push(...colorLines); + lines.push(...compareLines); + if (!isNaN(levelWeight)) { + lines.push(`level weight: ${fix5(levelWeight)}`); + } idCount += orderedTexelIndices.length; } } @@ -2131,9 +3194,13 @@ function layoutTwoColumns(columnA: string[], columnB: string[]) { */ export function getDepthOrArrayLayersForViewDimension(viewDimension?: GPUTextureViewDimension) { switch (viewDimension) { + case '1d': + return 1; case undefined: case '2d': return 1; + case '2d-array': + return 4; case '3d': return 8; case 'cube': @@ -2161,9 +3228,12 @@ export function chooseTextureSize({ }) { const { blockWidth, blockHeight } = kTextureFormatInfo[format]; const width = align(Math.max(minSize, blockWidth * minBlocks), blockWidth); - const height = align(Math.max(minSize, blockHeight * minBlocks), blockHeight); + const height = + viewDimension === '1d' ? 1 : align(Math.max(minSize, blockHeight * minBlocks), blockHeight); if (viewDimension === 'cube' || viewDimension === 'cube-array') { - const size = lcm(width, height); + const blockLCM = lcm(blockWidth, blockHeight); + const largest = Math.max(width, height); + const size = align(largest, blockLCM); return [size, size, viewDimension === 'cube-array' ? 24 : 6]; } const depthOrArrayLayers = getDepthOrArrayLayersForViewDimension(viewDimension); @@ -2177,11 +3247,17 @@ export const kCubeSamplePointMethods = ['cube-edges', 'texel-centre', 'spiral'] export type CubeSamplePointMethods = (typeof kSamplePointMethods)[number]; type TextureBuiltinInputArgs = { + textureBuiltin?: TextureBuiltin; descriptor: GPUTextureDescriptor; sampler?: GPUSamplerDescriptor; + derivatives?: boolean; mipLevel?: RangeDef; sampleIndex?: RangeDef; arrayIndex?: RangeDef; + grad?: boolean; + bias?: boolean; + component?: boolean; + depthRef?: boolean; offset?: boolean; hashInputs: (number | string | boolean)[]; }; @@ -2201,7 +3277,19 @@ function generateTextureBuiltinInputsImpl( radius?: number; loops?: number; }) -): { coords: T; mipLevel: number; sampleIndex?: number; arrayIndex?: number; offset?: T }[] { +): { + coords: T; + derivativeMult?: T; + ddx?: T; + ddy?: T; + mipLevel: number; + sampleIndex?: number; + arrayIndex?: number; + bias?: number; + offset?: T; + component?: number; + depthRef?: number; +}[] { const { method, descriptor } = args; const dimension = descriptor.dimension ?? '2d'; const mipLevelCount = descriptor.mipLevelCount ?? 1; @@ -2233,15 +3321,27 @@ function generateTextureBuiltinInputsImpl( const _hashInputs = args.hashInputs.map(v => typeof v === 'string' ? sumOfCharCodesOfString(v) : typeof v === 'boolean' ? (v ? 1 : 0) : v ); + + // returns a number between [0 and N) + const makeRandValue = ({ num, type }: RangeDef, ...hashInputs: number[]) => { + const range = num; + const number = (hashU32(..._hashInputs, ...hashInputs) / 0x1_0000_0000) * range; + return type === 'f32' ? number : Math.floor(number); + }; + + // for signed and float values returns [-1 to num] + // for unsigned values returns [0 to num] const makeRangeValue = ({ num, type }: RangeDef, ...hashInputs: number[]) => { - const range = num + type === 'u32' ? 1 : 2; + const range = num + (type === 'u32' ? 1 : 2); const number = (hashU32(..._hashInputs, ...hashInputs) / 0x1_0000_0000) * range - (type === 'u32' ? 0 : 1); return type === 'f32' ? number : Math.floor(number); }; - const makeIntHashValue = (min: number, max: number, ...hashInputs: number[]) => { + + // Generates the same values per coord instead of using all the extra `_hashInputs`. + const makeIntHashValueRepeatable = (min: number, max: number, ...hashInputs: number[]) => { const range = max - min; - return min + Math.floor((hashU32(..._hashInputs, ...hashInputs) / 0x1_0000_0000) * range); + return min + Math.floor((hashU32(...hashInputs) / 0x1_0000_0000) * range); }; // Samplers across devices use different methods to interpolate. @@ -2253,7 +3353,77 @@ function generateTextureBuiltinInputsImpl( // Linux, AMD Radeon Pro WX 3200: 256 // MacOS, M1 Mac: 256 const kSubdivisionsPerTexel = 4; - const nearest = !args.sampler || args.sampler.minFilter === 'nearest'; + + // When filtering is nearest then we want to avoid edges of texels + // + // U + // | + // +---+---+---+---+---+---+---+---+ + // | | A | B | | | | | | + // +---+---+---+---+---+---+---+---+ + // + // Above, coordinate U could sample either A or B + // + // U + // | + // +---+---+---+---+---+---+---+---+ + // | | A | B | C | | | | | + // +---+---+---+---+---+---+---+---+ + // + // For textureGather we want to avoid texel centers + // as for coordinate U could either gather A,B or B,C. + + const avoidEdgeCase = + !args.sampler || args.sampler.minFilter === 'nearest' || isBuiltinGather(args.textureBuiltin); + const edgeRemainder = isBuiltinGather(args.textureBuiltin) ? kSubdivisionsPerTexel / 2 : 0; + + // textureGather issues for 2d/3d textures + // + // If addressModeU is repeat, then on an 8x1 texture, u = 0.01 or u = 0.99 + // would gather these texels + // + // +---+---+---+---+---+---+---+---+ + // | * | | | | | | | * | + // +---+---+---+---+---+---+---+---+ + // + // If addressModeU is clamp-to-edge or mirror-repeat, + // then on an 8x1 texture, u = 0.01 would gather this texel + // + // +---+---+---+---+---+---+---+---+ + // | * | | | | | | | | + // +---+---+---+---+---+---+---+---+ + // + // and 0.99 would gather this texel + // + // +---+---+---+---+---+---+---+---+ + // | | | | | | | | * | + // +---+---+---+---+---+---+---+---+ + // + // This means we have to if addressMode is not `repeat`, we + // need to avoid the edge of the texture. + // + // Note: we don't have these specific issues with cube maps + // as they ignore addressMode + const euclideanModulo = (n: number, m: number) => ((n % m) + m) % m; + const addressMode: GPUAddressMode[] = + args.textureBuiltin === 'textureSampleBaseClampToEdge' + ? ['clamp-to-edge', 'clamp-to-edge', 'clamp-to-edge'] + : [ + args.sampler?.addressModeU ?? 'clamp-to-edge', + args.sampler?.addressModeV ?? 'clamp-to-edge', + args.sampler?.addressModeW ?? 'clamp-to-edge', + ]; + const avoidTextureEdge = (axis: number, textureDimensionUnits: number, v: number) => { + assert(isBuiltinGather(args.textureBuiltin)); + if (addressMode[axis] === 'repeat') { + return v; + } + const inside = euclideanModulo(v, textureDimensionUnits); + const outside = v - inside; + return outside + clamp(inside, { min: 1, max: textureDimensionUnits - 1 }); + }; + + const numComponents = isDepthOrStencilTextureFormat(descriptor.format) ? 1 : 4; return coords.map((c, i) => { const mipLevel = args.mipLevel ? quantizeMipLevel(makeRangeValue(args.mipLevel, i), args.sampler?.mipmapFilter ?? 'nearest') @@ -2265,27 +3435,115 @@ function generateTextureBuiltinInputsImpl( const coords = c.map((v, i) => { // Quantize to kSubdivisionsPerPixel const v1 = Math.floor(v * q[i]); - // If it's nearest and we're on the edge of a texel then move us off the edge - // since the edge could choose one texel or another in nearest mode - const v2 = nearest && v1 % kSubdivisionsPerTexel === 0 ? v1 + 1 : v1; + // If it's nearest or textureGather and we're on the edge of a texel then move us off the edge + // since the edge could choose one texel or another. + const isTexelEdgeCase = Math.abs(v1 % kSubdivisionsPerTexel) === edgeRemainder; + const v2 = isTexelEdgeCase && avoidEdgeCase ? v1 + 1 : v1; + const v3 = isBuiltinGather(args.textureBuiltin) ? avoidTextureEdge(i, q[i], v2) : v2; // Convert back to texture coords - return v2 / q[i]; + return v3 / q[i]; }) as T; + const makeGradient = (hashInput: number): T => { + return coords.map((_, i) => { + // a value between -4 and 4 integer then add +/- 0.25 + // We want to be able to choose levels but we want to avoid the area where the + // gpu might choose 2 different levels than the software renderer. + const intPart = makeRangeValue({ num: 8, type: 'u32' }, i, hashInput) - 4; + const fractPart = makeRangeValue({ num: 0, type: 'f32' }, i, hashInput + 1) * 0.25; + assert(fractPart >= -0.25 && fractPart <= 0.25); + return intPart + fractPart; + }) as T; + }; + + // choose a derivative value that will select a mipLevel. + const makeDerivativeMult = (coords: T, mipLevel: number): T => { + // Make an identity vec (all 1s). + const mult = new Array(coords.length).fill(0); + // choose one axis to set + const ndx = makeRangeValue({ num: coords.length - 1, type: 'u32' }, i, 8); + assert(ndx < coords.length); + mult[ndx] = Math.pow(2, mipLevel); + return mult as T; + }; + + // Choose a mip level. If mipmapFilter is 'nearest' then avoid centers of levels + // else avoid edges. + const chooseMipLevel = () => { + const innerLevelR = makeRandValue({ num: 9, type: 'u32' }, i, 11); + const innerLevel = + args?.sampler?.mipmapFilter === 'linear' + ? innerLevelR + 1 + : innerLevelR < 5 + ? innerLevelR + : innerLevelR + 1; + const outerLevel = makeRangeValue({ num: mipLevelCount - 1, type: 'i32' }, i, 11); + return outerLevel + innerLevel / 10; + }; + + // for textureSample, choose a derivative value that will select a mipLevel near + // the range of mip levels. + const makeDerivativeMultForTextureSample = (coords: T): T => { + const mipLevel = chooseMipLevel(); + return makeDerivativeMult(coords, mipLevel); + }; + + // for textureSampleBias we choose a mipLevel we want to sample, then a bias between -17 and 17. + // and then a derivative that, given the chosen bias will arrive at the chosen mipLevel. + // The GPU is supposed to clamp between -16.0 and 15.99. + const makeBiasAndDerivativeMult = (coords: T): [number, T] => { + const mipLevel = chooseMipLevel(); + const bias = makeRangeValue({ num: 34, type: 'f32' }, i, 9) - 17; + const clampedBias = clamp(bias, { min: -16, max: 15.99 }); + const derivativeBasedMipLevel = mipLevel - clampedBias; + const derivativeMult = makeDerivativeMult(coords, derivativeBasedMipLevel); + return [bias, derivativeMult]; + }; + + // If bias is set this is textureSampleBias. If bias is not set but derivatives + // is then this is one of the other functions that needs implicit derivatives. + const [bias, derivativeMult] = args.bias + ? makeBiasAndDerivativeMult(coords) + : args.derivatives + ? [undefined, makeDerivativeMultForTextureSample(coords)] + : []; + return { coords, + derivativeMult, mipLevel, sampleIndex: args.sampleIndex ? makeRangeValue(args.sampleIndex, i, 1) : undefined, arrayIndex: args.arrayIndex ? makeRangeValue(args.arrayIndex, i, 2) : undefined, + // use 0.0, 0.5, or 1.0 for depthRef. We can't test for equality except for values 0 and 1 + // The texture will be filled with random values unless our comparison is 'equal' or 'not-equal' + // in which case the texture will be filled with only 0, 0.6, 1. Choosing 0.0, 0.5, 1.0 here + // means we can test 'equal' and 'not-equal'. For other comparisons, the fact that the texture's + // contents is random seems enough to test all the comparison modes. + depthRef: args.depthRef ? makeRandValue({ num: 3, type: 'u32' }, i, 5) / 2 : undefined, + ddx: args.grad ? makeGradient(7) : undefined, + ddy: args.grad ? makeGradient(8) : undefined, + bias, offset: args.offset - ? (coords.map((_, j) => makeIntHashValue(-8, 8, i, 3 + j)) as T) + ? (coords.map((_, j) => makeIntHashValueRepeatable(-8, 8, i, 3 + j)) as T) : undefined, + component: args.component ? makeIntHashValueRepeatable(0, numComponents, i, 4) : undefined, }; }); } +/** + * When mipmapFilter === 'nearest' we need to stay away from 0.5 + * because the GPU could decide to choose one mip or the other. + * + * Some example transition values, the value at which the GPU chooses + * mip level 1 over mip level 0: + * + * M1 Mac: 0.515381 + * Intel Mac: 0.49999 + * AMD Mac: 0.5 + */ const kMipEpsilon = 0.02; -function quantizeMipLevel(mipLevel: number, mipmapFilter: GPUFilterMode) { +function quantizeMipLevel(mipLevel: number, mipmapFilter: GPUMipmapFilterMode) { if (mipmapFilter === 'linear') { return mipLevel; } @@ -2360,7 +3618,7 @@ function normalize(v: vec3): vec3 { /** * Converts a cube map coordinate to a uv coordinate (0 to 1) and layer (0.5/6.0 to 5.5/6.0). */ -export function convertCubeCoordToNormalized3DTextureCoord(v: vec3): vec3 { +function convertCubeCoordToNormalized3DTextureCoord(v: vec3): vec3 { let uvw; let layer; // normalize the coord. @@ -2389,141 +3647,41 @@ export function convertCubeCoordToNormalized3DTextureCoord(v: vec3): vec3 { /** * Convert a 3d texcoord into a cube map coordinate. */ -export function convertNormalized3DTexCoordToCubeCoord(uvLayer: vec3) { +function convertNormalized3DTexCoordToCubeCoord(uvLayer: vec3) { const [u, v, faceLayer] = uvLayer; return normalize(transformMat3([u, v, 1], kFaceUVMatrices[Math.min(5, faceLayer * 6) | 0])); } /** + * Wrap a texel based face coord across cube faces + * * We have a face texture in texels coord where U/V choose a texel and W chooses the face. * If U/V are outside the size of the texture then, when normalized and converted * to a cube map coordinate, they'll end up pointing to a different face. * * addressMode is effectively ignored for cube * - * +-----------+ - * |0->u | - * |↓ | - * |v +y | - * | (2) | - * | | - * +-----------+-----------+-----------+-----------+ - * |0->u |0->u |0->u |0->u | - * |↓ |↓ |↓ |↓ | - * |v -x |v +z |v +x |v -z | - * | (1) | (4) | (0) | (5) | - * | | | | | - * +-----------+-----------+-----------+-----------+ - * |0->u | - * |↓ | - * |v -y | - * | (3) | - * | | - * +-----------+ + * By converting from a texel based coord to a normalized coord and then to a cube map coord, + * if the texel was outside of the face, the cube map coord will end up pointing to a different + * face. We then convert back cube coord -> normalized face coord -> texel based coord */ -const kFaceConversions = { - u: (textureSize: number, faceCoord: vec3) => faceCoord[0], - v: (textureSize: number, faceCoord: vec3) => faceCoord[1], - 'u+t': (textureSize: number, faceCoord: vec3) => faceCoord[0] + textureSize, - 'u-t': (textureSize: number, faceCoord: vec3) => faceCoord[0] - textureSize, - 'v+t': (textureSize: number, faceCoord: vec3) => faceCoord[1] + textureSize, - 'v-t': (textureSize: number, faceCoord: vec3) => faceCoord[1] - textureSize, - 't-v': (textureSize: number, faceCoord: vec3) => textureSize - faceCoord[1], - '1+u': (textureSize: number, faceCoord: vec3) => 1 + faceCoord[0], - '1+v': (textureSize: number, faceCoord: vec3) => 1 + faceCoord[1], - '-v-1': (textureSize: number, faceCoord: vec3) => -faceCoord[1] - 1, - 't-u-1': (textureSize: number, faceCoord: vec3) => textureSize - faceCoord[0] - 1, - 't-v-1': (textureSize: number, faceCoord: vec3) => textureSize - faceCoord[1] - 1, - '2t-u-1': (textureSize: number, faceCoord: vec3) => textureSize * 2 - faceCoord[0] - 1, - '2t-v-1': (textureSize: number, faceCoord: vec3) => textureSize * 2 - faceCoord[1] - 1, -} as const; -const kFaceConversionEnums = keysOf(kFaceConversions); -type FaceCoordConversion = (typeof kFaceConversionEnums)[number]; - -// For Each face -// face to go if u < 0 -// face to go if u >= textureSize -// face to go if v < 0 -// face to go if v >= textureSize -const kFaceToFaceRemap: { to: number; u: FaceCoordConversion; v: FaceCoordConversion }[][] = [ - // 0 - [ - /* -u */ { to: 4, u: 'u+t', v: 'v' }, - /* +u */ { to: 5, u: 'u-t', v: 'v' }, - /* -v */ { to: 2, u: 'v+t', v: 't-u-1' }, - /* +v */ { to: 3, u: '2t-v-1', v: 'u' }, - ], - // 1 - [ - /* -u */ { to: 5, u: 'u+t', v: 'v' }, - /* +u */ { to: 4, u: 'u-t', v: 'v' }, - /* -v */ { to: 2, u: '-v-1', v: 'u' }, // -1->0, -2->1 -3->2 - /* +v */ { to: 3, u: 't-v', v: 't-u-1' }, - ], - // 2 - [ - /* -u */ { to: 1, u: 'v', v: '1+u' }, - /* +u */ { to: 0, u: 't-v-1', v: 'u-t' }, - /* -v */ { to: 5, u: 't-u-1', v: '-v-1' }, - /* +v */ { to: 4, u: 'u', v: 'v-t' }, - ], - // 3 - [ - /* -u */ { to: 1, u: 't-v-1', v: 'u+t' }, - /* +u */ { to: 0, u: 'v', v: '2t-u-1' }, - /* -v */ { to: 4, u: 'u', v: 'v+t' }, - /* +v */ { to: 5, u: 't-u-1', v: '2t-v-1' }, - ], - // 4 - [ - /* -u */ { to: 1, u: 'u+t', v: 'v' }, - /* +u */ { to: 0, u: 'u-t', v: 'v' }, - /* -v */ { to: 2, u: 'u', v: 'v+t' }, - /* +v */ { to: 3, u: 'u', v: 'v-t' }, - ], - // 5 - [ - /* -u */ { to: 0, u: 'u+t', v: 'v' }, - /* +u */ { to: 1, u: 'u-t', v: 'v' }, - /* -v */ { to: 2, u: 't-u-1', v: '1+v' }, - /* +v */ { to: 3, u: 't-u-1', v: '2t-v-1' }, - ], -]; - -function getFaceWrapIndex(textureSize: number, faceCoord: vec3) { - if (faceCoord[0] < 0) { - return 0; - } - if (faceCoord[0] >= textureSize) { - return 1; - } - if (faceCoord[1] < 0) { - return 2; - } - if (faceCoord[1] >= textureSize) { - return 3; - } - return -1; -} - -function applyFaceWrap(textureSize: number, faceCoord: vec3): vec3 { - const ndx = getFaceWrapIndex(textureSize, faceCoord); - if (ndx < 0) { - return faceCoord; - } - const { to, u, v } = kFaceToFaceRemap[faceCoord[2]][ndx]; - return [ - kFaceConversions[u](textureSize, faceCoord), - kFaceConversions[v](textureSize, faceCoord), - to, +function wrapFaceCoordToCubeFaceAtEdgeBoundaries(textureSize: number, faceCoord: vec3) { + // convert texel based face coord to normalized 2d-array coord + const nc0: vec3 = [ + (faceCoord[0] + 0.5) / textureSize, + (faceCoord[1] + 0.5) / textureSize, + (faceCoord[2] + 0.5) / 6, + ]; + const cc = convertNormalized3DTexCoordToCubeCoord(nc0); + const nc1 = convertCubeCoordToNormalized3DTextureCoord(cc); + // convert normalized 2d-array coord back texel based face coord + const fc = [ + Math.floor(nc1[0] * textureSize), + Math.floor(nc1[1] * textureSize), + Math.floor(nc1[2] * 6), ]; -} -function wrapFaceCoordToCubeFaceAtEdgeBoundaries(textureSize: number, faceCoord: vec3) { - // If we're off both edges we need to wrap twice, once for each edge. - const faceCoord1 = applyFaceWrap(textureSize, faceCoord); - const faceCoord2 = applyFaceWrap(textureSize, faceCoord1); - return faceCoord2; + return fc; } function applyAddressModesToCoords( @@ -2567,9 +3725,15 @@ export function generateSamplePointsCube( }) ): { coords: vec3; + derivativeMult?: vec3; + ddx?: vec3; + ddy?: vec3; mipLevel: number; arrayIndex?: number; + bias?: number; offset?: undefined; + component?: number; + depthRef?: number; }[] { const { method, descriptor } = args; const mipLevelCount = descriptor.mipLevelCount ?? 1; @@ -2610,20 +3774,38 @@ export function generateSamplePointsCube( /* prettier-ignore */ coords.push( // between edges - [-1.01, -1.02, 0], - [ 1.01, -1.02, 0], - [-1.01, 1.02, 0], - [ 1.01, 1.02, 0], - - [-1.01, 0, -1.02], - [ 1.01, 0, -1.02], - [-1.01, 0, 1.02], - [ 1.01, 0, 1.02], - - [-1.01, -1.02, 0], - [ 1.01, -1.02, 0], - [-1.01, 1.02, 0], - [ 1.01, 1.02, 0], + // +x + [ 1 , -1.01, 0 ], // wrap -y + [ 1 , +1.01, 0 ], // wrap +y + [ 1 , 0 , -1.01 ], // wrap -z + [ 1 , 0 , +1.01 ], // wrap +z + // -x + [ -1 , -1.01, 0 ], // wrap -y + [ -1 , +1.01, 0 ], // wrap +y + [ -1 , 0 , -1.01 ], // wrap -z + [ -1 , 0 , +1.01 ], // wrap +z + + // +y + [ -1.01, 1 , 0 ], // wrap -x + [ +1.01, 1 , 0 ], // wrap +x + [ 0 , 1 , -1.01 ], // wrap -z + [ 0 , 1 , +1.01 ], // wrap +z + // -y + [ -1.01, -1 , 0 ], // wrap -x + [ +1.01, -1 , 0 ], // wrap +x + [ 0 , -1 , -1.01 ], // wrap -z + [ 0 , -1 , +1.01 ], // wrap +z + + // +z + [ -1.01, 0 , 1 ], // wrap -x + [ +1.01, 0 , 1 ], // wrap +x + [ 0 , -1.01, 1 ], // wrap -y + [ 0 , +1.01, 1 ], // wrap +y + // -z + [ -1.01, 0 , -1 ], // wrap -x + [ +1.01, 0 , -1 ], // wrap +x + [ 0 , -1.01, -1 ], // wrap -y + [ 0 , +1.01, -1 ], // wrap +y // corners (see comment "Issues with corners of cubemaps") // for why these are commented out. @@ -2643,13 +3825,28 @@ export function generateSamplePointsCube( const _hashInputs = args.hashInputs.map(v => typeof v === 'string' ? sumOfCharCodesOfString(v) : typeof v === 'boolean' ? (v ? 1 : 0) : v ); + + // returns a number between [0 and N) + const makeRandValue = ({ num, type }: RangeDef, ...hashInputs: number[]) => { + const range = num; + const number = (hashU32(..._hashInputs, ...hashInputs) / 0x1_0000_0000) * range; + return type === 'f32' ? number : Math.floor(number); + }; + + // for signed and float values returns [-1 to num] + // for unsigned values returns [0 to num] const makeRangeValue = ({ num, type }: RangeDef, ...hashInputs: number[]) => { - const range = num + type === 'u32' ? 1 : 2; + const range = num + (type === 'u32' ? 1 : 2); const number = (hashU32(..._hashInputs, ...hashInputs) / 0x1_0000_0000) * range - (type === 'u32' ? 0 : 1); return type === 'f32' ? number : Math.floor(number); }; + const makeIntHashValue = (min: number, max: number, ...hashInputs: number[]) => { + const range = max - min; + return min + Math.floor((hashU32(..._hashInputs, ...hashInputs) / 0x1_0000_0000) * range); + }; + // Samplers across devices use different methods to interpolate. // Quantizing the texture coordinates seems to hit coords that produce // comparable results to our computed results. @@ -2658,12 +3855,102 @@ export function generateSamplePointsCube( // Win 11, NVidia 2070 Super: 16 // Linux, AMD Radeon Pro WX 3200: 256 // MacOS, M1 Mac: 256 + // + // Note: When doing `textureGather...` we can't use texel centers + // because which 4 pixels will be gathered jumps if we're slightly under + // or slightly over the center + // + // Similarly, if we're using 'nearest' filtering then we don't want texel + // edges for the same reason. + // + // Also note that for textureGather. The way it works for cube maps is to + // first convert from cube map coordinate to a 2D texture coordinate and + // a face. Then, choose 4 texels just like normal 2D texture coordinates. + // If one of the 4 texels is outside the current face, wrap it to the correct + // face. + // + // An issue this brings up though. Imagine a 2D texture with addressMode = 'repeat' + // + // 2d texture (same texture repeated to show 'repeat') + // ┌───┬───┬───┐ ┌───┬───┬───┐ + // │ │ │ │ │ │ │ │ + // ├───┼───┼───┤ ├───┼───┼───┤ + // │ │ │ a│ │c │ │ │ + // ├───┼───┼───┤ ├───┼───┼───┤ + // │ │ │ b│ │d │ │ │ + // └───┴───┴───┘ └───┴───┴───┘ + // + // Assume the texture coordinate is at the bottom right corner of a. + // Then textureGather will grab c, d, b, a (no idea why that order). + // but think of it as top-right, bottom-right, bottom-left, top-left. + // Similarly, if the texture coordinate is at the top left of d it + // will select the same 4 texels. + // + // But, in the case of a cubemap, each face is in different direction + // relative to the face next to it. + // + // +-----------+ + // |0->u | + // |↓ | + // |v +y | + // | (2) | + // | | + // +-----------+-----------+-----------+-----------+ + // |0->u |0->u |0->u |0->u | + // |↓ |↓ |↓ |↓ | + // |v -x |v +z |v +x |v -z | + // | (1) | (4) | (0) | (5) | + // | | | | | + // +-----------+-----------+-----------+-----------+ + // |0->u | + // |↓ | + // |v -y | + // | (3) | + // | | + // +-----------+ + // + // As an example, imagine going from the +y to the +x face. + // See diagram above, the right edge of the +y face wraps + // to the top edge of the +x face. + // + // +---+---+ + // | a|c | + // ┌───┬───┬───┐ ┌───┬───┬───┐ + // │ │ │ │ │ b│d │ │ + // ├───┼───┼───┤---+ ├───┼───┼───┤ + // │ │ │ a│ c | │ │ │ │ + // ├───┼───┼───┤---+ ├───┼───┼───┤ + // │ │ │ b│ d | │ │ │ │ + // └───┴───┴───┘---+ └───┴───┴───┘ + // +y face +x face + // + // If the texture coordinate is in the bottom right corner of a, + // the rectangle of texels we read are a,b,c,d and, if we the + // texture coordinate is in the top left corner of d we also + // read a,b,c,d according to the 2 diagrams above. + // + // But, notice that when reading from the POV of +y vs +x, + // which actual a,b,c,d texels are different. + // + // From the POV of face +x: a,b are in face +x and c,d are in face +y + // From the POV of face +y: a,c are in face +x and b,d are in face +y + // + // This is all the long way of saying that if we're on the edge of a cube + // face we could get drastically different results because the orientation + // of the rectangle of the 4 texels we use, rotates. So, we need to avoid + // any values too close to the edge just in case our math is different than + // the GPU's. + // const kSubdivisionsPerTexel = 4; - const nearest = !args.sampler || args.sampler.minFilter === 'nearest'; + const avoidEdgeCase = + !args.sampler || args.sampler.minFilter === 'nearest' || isBuiltinGather(args.textureBuiltin); + const edgeRemainder = isBuiltinGather(args.textureBuiltin) ? kSubdivisionsPerTexel / 2 : 0; return coords.map((c, i) => { - const mipLevel = args.mipLevel ? makeRangeValue(args.mipLevel, i) : 0; + const mipLevel = args.mipLevel + ? quantizeMipLevel(makeRangeValue(args.mipLevel, i), args.sampler?.mipmapFilter ?? 'nearest') + : 0; const clampedMipLevel = clamp(mipLevel, { min: 0, max: mipLevelCount - 1 }); - const mipSize = virtualMipSize('2d', size, clampedMipLevel); + const mipSize = virtualMipSize('2d', size, Math.ceil(clampedMipLevel)); const q = [ mipSize[0] * kSubdivisionsPerTexel, mipSize[0] * kSubdivisionsPerTexel, @@ -2683,17 +3970,92 @@ export function generateSamplePointsCube( const quantizedUVW = uvw.map((v, i) => { // Quantize to kSubdivisionsPerPixel const v1 = Math.floor(v * q[i]); - // If it's nearest and we're on the edge of a texel then move us off the edge - // since the edge could choose one texel or another in nearest mode - const v2 = nearest && v1 % kSubdivisionsPerTexel === 0 ? v1 + 1 : v1; - // Convert back to texture coords - return v2 / q[i]; + // If it's nearest or textureGather and we're on the edge of a texel then move us off the edge + // since the edge could choose one texel or another. + const isEdgeCase = Math.abs(v1 % kSubdivisionsPerTexel) === edgeRemainder; + const v2 = isEdgeCase && avoidEdgeCase ? v1 + 1 : v1; + // Convert back to texture coords slightly off + return (v2 + 1 / 16) / q[i]; }) as vec3; + + const quantize = (v: number, units: number) => Math.floor(v * units) * units; + + const makeGradient = (hashInput: number): T => { + return coords.map((_, i) => + // a value between -4 and 4, quantized to 1/3rd. + quantize(makeRangeValue({ num: 8, type: 'f32' }, i, hashInput) - 4, 1 / 3) + ) as T; + }; + const coords = convertNormalized3DTexCoordToCubeCoord(quantizedUVW); + + // choose a derivative value that will select a mipLevel. + const makeDerivativeMult = (coords: vec3, mipLevel: number): vec3 => { + // Make an identity vec (all 1s). + const mult = new Array(coords.length).fill(0); + // choose one axis to set + const ndx = makeRangeValue({ num: coords.length - 1, type: 'u32' }, i, 8); + assert(ndx < coords.length); + mult[ndx] = Math.pow(2, mipLevel); + return mult as vec3; + }; + + // Choose a mip level. If mipmapFilter is 'nearest' then avoid centers of levels + // else avoid edges. + const chooseMipLevel = () => { + const innerLevelR = makeRandValue({ num: 9, type: 'u32' }, i, 11); + const innerLevel = + args?.sampler?.mipmapFilter === 'linear' + ? innerLevelR + 1 + : innerLevelR < 4 + ? innerLevelR + : innerLevelR + 1; + const outerLevel = makeRangeValue({ num: mipLevelCount - 1, type: 'i32' }, i, 11); + return outerLevel + innerLevel / 10; + }; + + // for textureSample, choose a derivative value that will select a mipLevel near + // the range of mip levels. + const makeDerivativeMultForTextureSample = (coords: vec3): vec3 => { + const mipLevel = chooseMipLevel(); + return makeDerivativeMult(coords, mipLevel); + }; + + // for textureSampleBias we choose a mipLevel we want to sample, then a bias between -17 and 17. + // and then a derivative that, given the chosen bias will arrive at the chosen mipLevel. + // The GPU is supposed to clamp between -16.0 and 15.99. + const makeBiasAndDerivativeMult = (coords: vec3): [number, vec3] => { + const mipLevel = chooseMipLevel(); + const bias = makeRangeValue({ num: 34, type: 'f32' }, i, 9) - 17; + const clampedBias = clamp(bias, { min: -16, max: 15.99 }); + const derivativeBasedMipLevel = mipLevel - clampedBias; + const derivativeMult = makeDerivativeMult(coords, derivativeBasedMipLevel); + return [bias, derivativeMult]; + }; + + // If bias is set this is textureSampleBias. If bias is not set but derivatives + // is then this is one of the other functions that needs implicit derivatives. + const [bias, derivativeMult] = args.bias + ? makeBiasAndDerivativeMult(coords) + : args.derivatives + ? [undefined, makeDerivativeMultForTextureSample(coords)] + : []; + return { coords, + derivativeMult, + ddx: args.grad ? makeGradient(7) : undefined, + ddy: args.grad ? makeGradient(8) : undefined, mipLevel, arrayIndex: args.arrayIndex ? makeRangeValue(args.arrayIndex, i, 2) : undefined, + bias, + // use 0.0, 0.5, or 1.0 for depthRef. We can't test for equality except for values 0 and 1 + // The texture will be filled with random values unless our comparison is 'equal' or 'not-equal' + // in which case the texture will be filled with only 0, 0.6, 1. Choosing 0.0, 0.5, 1.0 here + // means we can test 'equal' and 'not-equal'. For other comparisons, the fact that the texture's + // contents is random seems enough to test all the comparison modes. + depthRef: args.depthRef ? makeRandValue({ num: 3, type: 'u32' }, i, 5) / 2 : undefined, + component: args.component ? makeIntHashValue(0, 4, i, 4) : undefined, }; }); } @@ -2714,7 +4076,9 @@ function wgslTypeFor(data: number | Dimensionality, type: 'f' | 'i' | 'u'): stri return `${type}32`; } -function wgslExpr(data: number | vec1 | vec2 | vec3 | vec4): string { +function wgslExpr( + data: number | Readonly | Readonly | Readonly | Readonly +): string { if (Array.isArray(data)) { switch (data.length) { case 1: @@ -2751,8 +4115,8 @@ function binKey(call: TextureCall): string { for (const name of kTextureCallArgNames) { const value = call[name]; if (value !== undefined) { - if (name === 'offset') { - // offset must be a constant expression + if (name === 'offset' || name === 'component') { + // offset and component must be constant expressions keys.push(`${name}: ${wgslExpr(value)}`); } else { keys.push(`${name}: ${wgslTypeFor(value, call.coordType)}`); @@ -2763,12 +4127,19 @@ function binKey(call: TextureCall): string { } function buildBinnedCalls(calls: TextureCall[]) { - const args: string[] = ['T']; // All texture builtins take the texture as the first argument + const args: string[] = []; const fields: string[] = []; const data: number[] = []; - const prototype = calls[0]; - if (prototype.builtin.startsWith('textureSample')) { + + if (isBuiltinGather(prototype.builtin) && prototype['componentType']) { + args.push(`/* component */ ${wgslExpr(prototype['component']!)}`); + } + + // All texture builtins take a Texture + args.push('T'); + + if (builtinNeedsSampler(prototype.builtin)) { // textureSample*() builtins take a sampler as the second argument args.push('S'); } @@ -2778,6 +4149,8 @@ function buildBinnedCalls(calls: TextureCall[]) { if (value !== undefined) { if (name === 'offset') { args.push(`/* offset */ ${wgslExpr(value)}`); + } else if (name === 'component') { + // was handled above } else { const type = name === 'mipLevel' @@ -2786,8 +4159,18 @@ function buildBinnedCalls(calls: TextureCall[]) { ? prototype.arrayIndexType! : name === 'sampleIndex' ? prototype.sampleIndexType! + : name === 'bias' || name === 'depthRef' || name === 'ddx' || name === 'ddy' + ? 'f' : prototype.coordType; - args.push(`args.${name}`); + if (name !== 'derivativeMult') { + args.push( + `args.${name}${ + name === 'coords' && builtinNeedsDerivatives(prototype.builtin) + ? ' + derivativeBase * args.derivativeMult' + : '' + }` + ); + } fields.push(`@align(16) ${name} : ${wgslTypeFor(value, type)}`); } } @@ -2800,7 +4183,7 @@ function buildBinnedCalls(calls: TextureCall[]) { (prototype[name] === undefined) === (value === undefined), 'texture calls are not binned correctly' ); - if (value !== undefined && name !== 'offset') { + if (value !== undefined && name !== 'offset' && name !== 'component') { const type = getCallArgType(call, name); const bitcastToU32 = kBitCastFunctions[type]; if (value instanceof Array) { @@ -2839,22 +4222,39 @@ function binCalls(calls: TextureCall[]): number[][] return bins; } -export function describeTextureCall(call: TextureCall): string { - const args: string[] = ['texture: T']; - if (call.builtin.startsWith('textureSample')) { +function describeTextureCall(call: TextureCall): string { + const args: string[] = []; + if (isBuiltinGather(call.builtin) && call.componentType) { + args.push(`component: ${wgslExprFor(call.component!, call.componentType)}`); + } + args.push('texture: T'); + if (builtinNeedsSampler(call.builtin)) { args.push('sampler: S'); } for (const name of kTextureCallArgNames) { const value = call[name]; - if (value !== undefined) { + if (value !== undefined && name !== 'component') { if (name === 'coords') { + const derivativeWGSL = builtinNeedsDerivatives(call.builtin) + ? ` + derivativeBase * derivativeMult(${ + call.derivativeMult ? wgslExprFor(call.derivativeMult, call.coordType) : '1' + })` + : ''; + args.push(`${name}: ${wgslExprFor(value, call.coordType)}${derivativeWGSL}`); + } else if (name === 'derivativeMult') { + // skip this - it's covered in 'coords' + } else if (name === 'ddx' || name === 'ddy') { args.push(`${name}: ${wgslExprFor(value, call.coordType)}`); } else if (name === 'mipLevel') { args.push(`${name}: ${wgslExprFor(value, call.levelType!)}`); } else if (name === 'arrayIndex') { args.push(`${name}: ${wgslExprFor(value, call.arrayIndexType!)}`); + } else if (name === 'bias') { + args.push(`${name}: ${wgslExprFor(value, 'f')}`); } else if (name === 'sampleIndex') { args.push(`${name}: ${wgslExprFor(value, call.sampleIndexType!)}`); + } else if (name === 'depthRef') { + args.push(`${name}: ${wgslExprFor(value, 'f')}`); } else { args.push(`${name}: ${wgslExpr(value)}`); } @@ -2863,27 +4263,95 @@ export function describeTextureCall(call: TextureCall< return `${call.builtin}(${args.join(', ')})`; } -const s_deviceToPipelines = new WeakMap>(); +const s_deviceToPipelines = new WeakMap< + GPUDevice, + Map +>(); /** * Given a list of "calls", each one of which has a texture coordinate, - * generates a fragment shader that uses the fragment position as an index - * (position.y * 256 + position.x) That index is then used to look up a - * coordinate from a storage buffer which is used to call the WGSL texture - * function to read/sample the texture, and then write to an rgba32float - * texture. We then read the rgba32float texture for the per "call" results. + * generates a fragment shader that uses the instance_index as an index. That + * index is then used to look up a coordinate from a storage buffer which is + * used to call the WGSL texture function to read/sample the texture, and then + * write to a storage buffer. We then read the storage buffer for the per "call" + * results. + * + * We use a 1x1 target and use instance drawing, once instance per call. + * This allows use to more easily adjust derivatives per call. + * + * An issue we ran into before this "one draw call per instance" change; + * Before we had a single draw call and wrote the result of one call per + * pixel rendered. + * + * Imagine we have code like this: + * + * ``` + * @group(0) @binding(0) var T: texture_2d; + * @group(0) @binding(1) var S: sampler; + * @group(0) @binding(2) var coords: array; + * @fragment fn fs(@builtin(position) pos: vec4f) -> vec4f { + * let ndx = u32(pos.x) * u32(pos.y) * targetWidth; + * return textureSample(T, S, coords[ndx].xy); + * } + * ``` + * + * T points to 8x8 pixel texture with 3 mip levels + * S is 'nearest' + * coords: is a storage buffer, 16 bytes long [0,0,0,0], one vec4f. + * our render target is 1x1 pixels + * + * Looking above it appears `ndx` will only ever be 0 but that's + * not what happens. Instead, the GPU will run the fragment shader for + * a 2x2 area. It does this to compute derivatives by running the code + * above and looking at what values it gets passed as coords to + * textureSample. When it does this it ends up with + * + * ndx = 0 for invocation 0 + * ndx = 1 for invocation 1 + * ndx = 0 + 1 * targetWidth for invocation 2 + * ndx = 1 + 1 * targetWidth for invocation 3 + * + * In 3 of those cases `ndx` is out of bounds with respect to `coords`. + * Out of bounds access is indeterminate. That means the derivatives are + * indeterminate so what lod it tries to read is indeterminate. + * + * By using instance_index for ndx we avoid this issue. ndx is the same + * on all 4 executions. * * Calls are "binned" by call parameters. Each bin has its own structure and * field in the storage buffer. This allows the calls to be non-homogenous and * each have their own data type for coordinates. + * + * Note: this function returns: + * + * 'results': an array of results, one for each call. + * + * 'run': a function that accepts a texture and runs the same class pipeline with + * that texture as input, returning an array of results. This can be used by + * identifySamplePoints to query the mix-weights used. We do this so we're + * using the same shader that generated the original results when querying + * the weights. + * + * 'destroy': a function that cleans up the buffers used by `run`. */ -export async function doTextureCalls( +function createTextureCallsRunner( t: GPUTest, - gpuTexture: GPUTexture | GPUExternalTexture, + { + format, + dimension, + sampleCount, + depthOrArrayLayers, + }: { + format: GPUTextureFormat; + dimension: GPUTextureDimension; + sampleCount: number; + depthOrArrayLayers: number; + }, viewDescriptor: GPUTextureViewDescriptor, textureType: string, sampler: GPUSamplerDescriptor | undefined, - calls: TextureCall[] + calls: TextureCall[], + stage: ShaderStage ) { let structs = ''; let body = ''; @@ -2894,15 +4362,15 @@ export async function doTextureCalls( binned.forEach((binCalls, binIdx) => { const b = buildBinnedCalls(binCalls.map(callIdx => calls[callIdx])); structs += `struct Args${binIdx} { - ${b.fields.join(', \n')} + ${b.fields.join(',\n ')} } `; dataFields += ` args${binIdx} : array, `; body += ` { - let is_active = (frag_idx >= ${callCount}) & (frag_idx < ${callCount + binCalls.length}); - let args = data.args${binIdx}[frag_idx - ${callCount}]; + let is_active = (idx >= ${callCount}) & (idx < ${callCount + binCalls.length}); + let args = data.args${binIdx}[idx - ${callCount}]; let call = ${b.expr}; result = select(result, call, is_active); } @@ -2913,25 +4381,93 @@ export async function doTextureCalls( const dataBuffer = t.createBufferTracked({ size: data.length * 4, - usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE, + usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.UNIFORM, }); t.device.queue.writeBuffer(dataBuffer, 0, new Uint32Array(data)); - const { resultType, resultFormat, componentType } = - gpuTexture instanceof GPUExternalTexture - ? ({ resultType: 'vec4f', resultFormat: 'rgba32float', componentType: 'f32' } as const) - : textureType.includes('depth') - ? ({ resultType: 'f32', resultFormat: 'rgba32float', componentType: 'f32' } as const) - : getTextureFormatTypeInfo(gpuTexture.format); + const builtin = calls[0].builtin; + const isCompare = isBuiltinComparison(builtin); + + const { resultType, resultFormat, componentType } = isBuiltinGather(builtin) + ? getTextureFormatTypeInfo(format) + : textureType === 'texture_external' + ? ({ resultType: 'vec4f', resultFormat: 'rgba32float', componentType: 'f32' } as const) + : textureType.includes('depth') + ? ({ resultType: 'f32', resultFormat: 'rgba32float', componentType: 'f32' } as const) + : getTextureFormatTypeInfo(format); const returnType = `vec4<${componentType}>`; - const rtWidth = 256; + const samplerType = isCompare ? 'sampler_comparison' : 'sampler'; + const renderTarget = t.createTextureTracked({ format: resultFormat, - size: { width: rtWidth, height: Math.ceil(calls.length / rtWidth) }, + size: [calls.length, 1], usage: GPUTextureUsage.COPY_SRC | GPUTextureUsage.RENDER_ATTACHMENT, }); + // derivativeBase is a number that starts at (0, 0, 0) and advances by 1 in x, y + // for each fragment shader iteration in texel space. It is then converted to normalized + // texture space by dividing by the textureDimensions. + // Since it's moving by 1 texel unit we can multiply it to get any specific lod value we want. + // Because it starts at (0, 0, 0) it will not affect our texture coordinate. + const derivativeBaseWGSL = ` + let derivativeBase = ${ + isCubeViewDimension(viewDescriptor) + ? '(v.pos.xyx - 0.5 - vec3f(f32(v.ndx), 0, f32(v.ndx))) / vec3f(vec2f(textureDimensions(T)), 1.0)' + : dimension === '1d' + ? 'f32(v.pos.x - 0.5 - f32(v.ndx)) / f32(textureDimensions(T))' + : dimension === '3d' + ? 'vec3f(v.pos.xy - 0.5 - vec2f(f32(v.ndx), 0), 0) / vec3f(textureDimensions(T))' + : '(v.pos.xy - 0.5 - vec2f(f32(v.ndx), 0)) / vec2f(textureDimensions(T))' + };`; + const derivativeType = + isCubeViewDimension(viewDescriptor) || dimension === '3d' + ? 'vec3f' + : dimension === '1d' + ? 'f32' + : 'vec2f'; + + const stageWGSL = + stage === 'vertex' + ? ` +// --------------------------- vertex stage shaders -------------------------------- +@vertex fn vsVertex( + @builtin(vertex_index) vertex_index : u32, + @builtin(instance_index) instance_index : u32) -> VOut { + let positions = array(vec2f(-1, 3), vec2f(3, -1), vec2f(-1, -1)); + return VOut(vec4f(positions[vertex_index], 0, 1), + instance_index, + getResult(instance_index, ${derivativeType}(0))); +} + +@fragment fn fsVertex(v: VOut) -> @location(0) ${returnType} { + return v.result; +} +` + : stage === 'fragment' + ? ` +// --------------------------- fragment stage shaders -------------------------------- +@vertex fn vsFragment( + @builtin(vertex_index) vertex_index : u32, + @builtin(instance_index) instance_index : u32) -> VOut { + let positions = array(vec2f(-1, 3), vec2f(3, -1), vec2f(-1, -1)); + return VOut(vec4f(positions[vertex_index], 0, 1), instance_index, ${returnType}(0)); +} + +@fragment fn fsFragment(v: VOut) -> @location(0) ${returnType} { + ${derivativeBaseWGSL} + return getResult(v.ndx, derivativeBase); +} +` + : ` +// --------------------------- compute stage shaders -------------------------------- +@group(1) @binding(0) var results: array<${returnType}>; + +@compute @workgroup_size(1) fn csCompute(@builtin(global_invocation_id) id: vec3u) { + results[id.x] = getResult(id.x, ${derivativeType}(0)); +} +`; + const code = ` ${structs} @@ -2939,120 +4475,301 @@ struct Data { ${dataFields} } -@vertex -fn vs_main(@builtin(vertex_index) vertex_index : u32) -> @builtin(position) vec4f { - let positions = array( - vec4f(-1, 1, 0, 1), vec4f( 1, 1, 0, 1), - vec4f(-1, -1, 0, 1), vec4f( 1, -1, 0, 1), - ); - return positions[vertex_index]; -} +struct VOut { + @builtin(position) pos: vec4f, + @location(0) @interpolate(flat, either) ndx: u32, + @location(1) @interpolate(flat, either) result: ${returnType}, +}; @group(0) @binding(0) var T : ${textureType}; -${sampler ? '@group(0) @binding(1) var S : sampler' : ''}; -@group(0) @binding(2) var data : Data; +${sampler ? `@group(0) @binding(1) var S : ${samplerType}` : ''}; +@group(0) @binding(2) var data : Data; -@fragment -fn fs_main(@builtin(position) frag_pos : vec4f) -> @location(0) ${returnType} { - let frag_idx = u32(frag_pos.x) + u32(frag_pos.y) * ${renderTarget.width}; +fn getResult(idx: u32, derivativeBase: ${derivativeType}) -> ${returnType} { var result : ${resultType}; ${body} return ${returnType}(result); } + +${stageWGSL} `; - const pipelines = s_deviceToPipelines.get(t.device) ?? new Map(); + const pipelines = + s_deviceToPipelines.get(t.device) ?? new Map(); s_deviceToPipelines.set(t.device, pipelines); - const id = `${renderTarget.format}:${code}`; + // unfilterable-float textures can only be used with manually created bindGroupLayouts + // since the default 'auto' layout requires filterable textures/samplers. + // So, if we don't need filtering, don't request a filtering sampler. If we require + // filtering then check if the format is 32float format and if float32-filterable + // is enabled. + const info = kTextureFormatInfo[format ?? 'rgba8unorm']; + const isFiltering = + !!sampler && + (sampler.minFilter === 'linear' || + sampler.magFilter === 'linear' || + sampler.mipmapFilter === 'linear'); + let sampleType: GPUTextureSampleType = textureType.startsWith('texture_depth') + ? 'depth' + : isDepthTextureFormat(format) + ? 'unfilterable-float' + : isStencilTextureFormat(format) + ? 'uint' + : info.color?.type ?? 'float'; + if (isFiltering && sampleType === 'unfilterable-float') { + assert(is32Float(format)); + assert(t.device.features.has('float32-filterable')); + sampleType = 'float'; + } + if (sampleCount > 1 && sampleType === 'float') { + sampleType = 'unfilterable-float'; + } + + const visibility = + stage === 'compute' + ? GPUShaderStage.COMPUTE + : stage === 'fragment' + ? GPUShaderStage.FRAGMENT + : GPUShaderStage.VERTEX; + + const entries: GPUBindGroupLayoutEntry[] = [ + { + binding: 2, + visibility, + buffer: { + type: 'uniform', + }, + }, + ]; + + const viewDimension = effectiveViewDimensionForDimension( + viewDescriptor.dimension, + dimension, + depthOrArrayLayers + ); + + if (textureType.includes('storage')) { + entries.push({ + binding: 0, + visibility, + storageTexture: { + access: 'read-only', + viewDimension, + format, + }, + }); + } else if (textureType === 'texture_external') { + entries.push({ + binding: 0, + visibility, + externalTexture: {}, + }); + } else { + entries.push({ + binding: 0, + visibility, + texture: { + sampleType, + viewDimension, + multisampled: sampleCount > 1, + }, + }); + } + + if (sampler) { + entries.push({ + binding: 1, + visibility, + sampler: { + type: isCompare ? 'comparison' : isFiltering ? 'filtering' : 'non-filtering', + }, + }); + } + + const id = `${resultType}:${stage}:${JSON.stringify(entries)}:${code}`; let pipeline = pipelines.get(id); if (!pipeline) { - const shaderModule = t.device.createShaderModule({ code }); + const module = t.device.createShaderModule({ code }); + const bindGroupLayout0 = t.device.createBindGroupLayout({ entries }); + const bindGroupLayouts = [bindGroupLayout0]; + + if (stage === 'compute') { + const bindGroupLayout1 = t.device.createBindGroupLayout({ + entries: [ + { + binding: 0, + visibility: GPUShaderStage.FRAGMENT | GPUShaderStage.COMPUTE, + buffer: { + type: 'storage', + }, + }, + ], + }); + bindGroupLayouts.push(bindGroupLayout1); + } - pipeline = await t.device.createRenderPipelineAsync({ - layout: 'auto', - vertex: { module: shaderModule }, - fragment: { - module: shaderModule, - targets: [{ format: renderTarget.format }], - }, - primitive: { topology: 'triangle-strip' }, + const layout = t.device.createPipelineLayout({ + bindGroupLayouts, }); + switch (stage) { + case 'compute': + pipeline = t.device.createComputePipeline({ + layout, + compute: { module }, + }); + break; + case 'fragment': + case 'vertex': + pipeline = t.device.createRenderPipeline({ + layout, + vertex: { module }, + fragment: { + module, + targets: [{ format: renderTarget.format }], + }, + }); + break; + } pipelines.set(id, pipeline); } const gpuSampler = sampler ? t.device.createSampler(sampler) : undefined; - const bindGroup = t.device.createBindGroup({ - layout: pipeline.getBindGroupLayout(0), - entries: [ - { - binding: 0, - resource: - gpuTexture instanceof GPUExternalTexture - ? gpuTexture - : gpuTexture.createView(viewDescriptor), - }, - ...(sampler ? [{ binding: 1, resource: gpuSampler! }] : []), - { binding: 2, resource: { buffer: dataBuffer } }, - ], - }); + const run = async (gpuTexture: GPUTexture | GPUExternalTexture) => { + const resultBuffer = t.createBufferTracked({ + size: align(calls.length * 16, 256), + usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ, + }); - const bytesPerRow = align(16 * renderTarget.width, 256); - const resultBuffer = t.createBufferTracked({ - size: renderTarget.height * bytesPerRow, - usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ, - }); - const encoder = t.device.createCommandEncoder(); + const bindGroup0 = t.device.createBindGroup({ + layout: pipeline!.getBindGroupLayout(0), + entries: [ + { + binding: 0, + resource: + gpuTexture instanceof GPUExternalTexture + ? gpuTexture + : gpuTexture.createView(viewDescriptor), + }, + ...(sampler ? [{ binding: 1, resource: gpuSampler! }] : []), + { binding: 2, resource: { buffer: dataBuffer } }, + ], + }); - const renderPass = encoder.beginRenderPass({ - colorAttachments: [ - { - view: renderTarget.createView(), - loadOp: 'clear', - storeOp: 'store', - }, - ], - }); + let storageBuffer: GPUBuffer | undefined; + const encoder = t.device.createCommandEncoder(); - renderPass.setPipeline(pipeline); - renderPass.setBindGroup(0, bindGroup); - renderPass.draw(4); - renderPass.end(); - encoder.copyTextureToBuffer( - { texture: renderTarget }, - { buffer: resultBuffer, bytesPerRow }, - { width: renderTarget.width, height: renderTarget.height } - ); - t.device.queue.submit([encoder.finish()]); + if (stage === 'compute') { + storageBuffer = t.createBufferTracked({ + size: resultBuffer.size, + usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC, + }); - await resultBuffer.mapAsync(GPUMapMode.READ); + const bindGroup1 = t.device.createBindGroup({ + layout: pipeline!.getBindGroupLayout(1), + entries: [{ binding: 0, resource: { buffer: storageBuffer } }], + }); - const view = TexelView.fromTextureDataByReference( - renderTarget.format as EncodableTextureFormat, - new Uint8Array(resultBuffer.getMappedRange()), - { - bytesPerRow, - rowsPerImage: renderTarget.height, - subrectOrigin: [0, 0, 0], - subrectSize: [renderTarget.width, renderTarget.height], + const pass = encoder.beginComputePass(); + pass.setPipeline(pipeline! as GPUComputePipeline); + pass.setBindGroup(0, bindGroup0); + pass.setBindGroup(1, bindGroup1); + pass.dispatchWorkgroups(calls.length); + pass.end(); + encoder.copyBufferToBuffer(storageBuffer, 0, resultBuffer, 0, storageBuffer.size); + } else { + const pass = encoder.beginRenderPass({ + colorAttachments: [ + { + view: renderTarget.createView(), + loadOp: 'clear', + storeOp: 'store', + }, + ], + }); + + pass.setPipeline(pipeline! as GPURenderPipeline); + pass.setBindGroup(0, bindGroup0); + for (let i = 0; i < calls.length; ++i) { + pass.setViewport(i, 0, 1, 1, 0, 1); + pass.draw(3, 1, 0, i); + } + pass.end(); + encoder.copyTextureToBuffer( + { texture: renderTarget }, + { + buffer: resultBuffer, + bytesPerRow: resultBuffer.size, + }, + [renderTarget.width, 1] + ); } - ); + t.device.queue.submit([encoder.finish()]); + + await resultBuffer.mapAsync(GPUMapMode.READ); + + const view = TexelView.fromTextureDataByReference( + resultFormat, + new Uint8Array(resultBuffer.getMappedRange()), + { + bytesPerRow: calls.length * 16, + rowsPerImage: 1, + subrectOrigin: [0, 0, 0], + subrectSize: [calls.length, 1], + } + ); - let outIdx = 0; - const out = new Array>(calls.length); - for (const bin of binned) { - for (const callIdx of bin) { - const x = outIdx % rtWidth; - const y = Math.floor(outIdx / rtWidth); - out[callIdx] = view.color({ x, y, z: 0 }); - outIdx++; + let outIdx = 0; + const out = new Array>(calls.length); + for (const bin of binned) { + for (const callIdx of bin) { + const x = outIdx; + out[callIdx] = view.color({ x, y: 0, z: 0 }); + outIdx++; + } } - } - renderTarget.destroy(); - resultBuffer.destroy(); + storageBuffer?.destroy(); + resultBuffer.destroy(); - return out; + return out; + }; + + return { + run, + destroy() { + dataBuffer.destroy(); + renderTarget.destroy(); + }, + }; +} + +export async function doTextureCalls( + t: GPUTest, + gpuTexture: GPUTexture | GPUExternalTexture, + viewDescriptor: GPUTextureViewDescriptor, + textureType: string, + sampler: GPUSamplerDescriptor | undefined, + calls: TextureCall[], + shortShaderStage: ShortShaderStage +) { + const stage = kShortShaderStageToShaderStage[shortShaderStage]; + const runner = createTextureCallsRunner( + t, + gpuTexture instanceof GPUExternalTexture + ? { format: 'rgba8unorm', dimension: '2d', depthOrArrayLayers: 1, sampleCount: 1 } + : gpuTexture, + viewDescriptor, + textureType, + sampler, + calls, + stage + ); + const results = await runner.run(gpuTexture); + + return { + runner, + results, + }; } diff --git a/src/webgpu/shader/execution/padding.spec.ts b/src/webgpu/shader/execution/padding.spec.ts index 3a3671bcc3ff..c9e230013590 100644 --- a/src/webgpu/shader/execution/padding.spec.ts +++ b/src/webgpu/shader/execution/padding.spec.ts @@ -263,6 +263,87 @@ g.test('array_of_vec3') ); }); +g.test('array_of_vec3h') + .desc( + `Test that padding bytes in between array elements are preserved when f16 elements are used. + + This test defines creates a read-write storage buffer with type array. The shader + assigns the whole variable at once, and we then test that data in the padding bytes was + preserved. + ` + ) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('shader-f16'); + }) + .fn(t => { + const wgsl = ` + enable f16; + @group(0) @binding(0) var buffer : array, 4>; + + @compute @workgroup_size(1) + fn main() { + buffer = array, 4>( + vec3(1h), + vec3(2h), + vec3(3h), + vec3(4h), + ); + } + `; + runShaderTest( + t, + wgsl, + new Uint32Array([ + // buffer[0] + 0x3c003c00, 0xdead3c00, + // buffer[1] + 0x40004000, 0xdead4000, + // buffer[2] + 0x42004200, 0xdead4200, + // buffer[2] + 0x44004400, 0xdead4400, + ]) + ); + }); + +g.test('array_of_vec3h,elementwise') + .desc( + `Test that padding bytes in between array elements are preserved when f16 elements are used. + + This test defines creates a read-write storage buffer with type array. The shader + assigns one element per thread, and we then test that data in the padding bytes was + preserved. + ` + ) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('shader-f16'); + }) + .fn(t => { + const wgsl = ` + enable f16; + @group(0) @binding(0) var buffer : array>; + + @compute @workgroup_size(4) + fn main(@builtin(local_invocation_index) lid : u32) { + buffer[lid] = vec3h(f16(lid + 1)); + } + `; + runShaderTest( + t, + wgsl, + new Uint32Array([ + // buffer[0] + 0x3c003c00, 0xdead3c00, + // buffer[1] + 0x40004000, 0xdead4000, + // buffer[2] + 0x42004200, 0xdead4200, + // buffer[2] + 0x44004400, 0xdead4400, + ]) + ); + }); + g.test('array_of_struct') .desc( `Test that padding bytes in between array elements are preserved. diff --git a/src/webgpu/shader/execution/shader_io/fragment_builtins.spec.ts b/src/webgpu/shader/execution/shader_io/fragment_builtins.spec.ts index ffd58976fc88..7a6aa8901e28 100644 --- a/src/webgpu/shader/execution/shader_io/fragment_builtins.spec.ts +++ b/src/webgpu/shader/execution/shader_io/fragment_builtins.spec.ts @@ -20,14 +20,17 @@ is evaluated per-fragment or per-sample. With @interpolate(, sample) or usage of import { makeTestGroup } from '../../../../common/framework/test_group.js'; import { ErrorWithExtra, assert, range, unreachable } from '../../../../common/util/util.js'; import { InterpolationSampling, InterpolationType } from '../../../constants.js'; -import { GPUTest } from '../../../gpu_test.js'; +import { kTextureFormatInfo } from '../../../format_info.js'; +import { GPUTest, TextureTestMixin } from '../../../gpu_test.js'; import { getProvokingVertexForFlatInterpolationEitherSampling } from '../../../inter_stage.js'; import { getMultisampleFragmentOffsets } from '../../../multisample_info.js'; -import { dotProduct, subtractVectors } from '../../../util/math.js'; +import { dotProduct, subtractVectors, align } from '../../../util/math.js'; import { TexelView } from '../../../util/texture/texel_view.js'; import { findFailedPixels } from '../../../util/texture/texture_ok.js'; -export const g = makeTestGroup(GPUTest); +class FragmentBuiltinTest extends TextureTestMixin(GPUTest) {} + +export const g = makeTestGroup(FragmentBuiltinTest); const s_deviceToPipelineMap = new WeakMap< GPUDevice, @@ -589,7 +592,7 @@ async function renderFragmentShaderInputsTo4TexturesAndReadbackValues( struct FragmentIn { @builtin(position) position: vec4f, - @location(0) @interpolate(${interpolate}) interpolatedValue: vec4f, +@location(0) @interpolate(${interpolate}) interpolatedValue: vec4f, ${fragInCode} }; @@ -1424,6 +1427,385 @@ g.test('inputs,sample_mask') ); }); -g.test('subgroup_size').unimplemented(); +const kSizes = [ + [15, 15], + [16, 16], + [17, 17], + [19, 13], + [13, 10], + [111, 2], + [2, 111], + [35, 2], + [2, 35], + [53, 13], + [13, 53], +] as const; + +/** + * @returns The population count of input. + * + * @param input Treated as an unsigned 32-bit integer + */ +function popcount(input: number): number { + let n = input; + n = n - ((n >> 1) & 0x55555555); + n = (n & 0x33333333) + ((n >> 2) & 0x33333333); + return (((n + (n >> 4)) & 0xf0f0f0f) * 0x1010101) >> 24; +} + +/** + * Checks subgroup_size builtin value consistency. + * + * The builtin subgroup_size is not assumed to be uniform in fragment shaders. + * Therefore, this function checks the value is a power of two within the device + * limits and that the ballot size is less than the stated size. + * @param data An array of vec4u that contains (per texel): + * * builtin value + * * ballot size + * * comparison to other invocations + * * 0 + * @param format The texture format for data + * @param min The minimum subgroup size from the device + * @param max The maximum subgroup size from the device + * @param width The width of the framebuffer + * @param height The height of the framebuffer + */ +function checkSubgroupSizeConsistency( + data: Uint32Array, + format: GPUTextureFormat, + min: number, + max: number, + width: number, + height: number +): Error | undefined { + const { blockWidth, blockHeight, bytesPerBlock } = kTextureFormatInfo[format]; + const blocksPerRow = width / blockWidth; + // Image copies require bytesPerRow to be a multiple of 256. + const bytesPerRow = align(blocksPerRow * (bytesPerBlock ?? 1), 256); + const uintsPerRow = bytesPerRow / 4; + const uintsPerTexel = (bytesPerBlock ?? 1) / blockWidth / blockHeight / 4; + + for (let row = 0; row < height; row++) { + for (let col = 0; col < width; col++) { + const offset = uintsPerRow * row + col * uintsPerTexel; + const builtinSize = data[offset]; + const ballotSize = data[offset + 1]; + const comparison = data[offset + 2]; + if (builtinSize === 0) { + continue; + } + + if (popcount(builtinSize) !== 1) { + return new Error(`Subgroup size '${builtinSize}' is not a power of two`); + } + + if (builtinSize < min) { + return new Error(`Subgroup size '${builtinSize}' is less than minimum '${min}'`); + } + if (max < builtinSize) { + return new Error(`Subgroup size '${builtinSize}' is greater than maximum '${max}'`); + } + + if (builtinSize < ballotSize) { + return new Error(`Inconsistent subgroup ballot size +- icoord: (${row}, ${col}) +- expected: ${builtinSize} +- got: ${ballotSize}`); + } + + if (comparison !== 1) { + return new Error(`Not all invocations in subgroup have same view of the size +- icoord: (${row}, ${col})`); + } + } + } + + return undefined; +} + +/** + * Runs a subgroup builtin test for fragment shaders + * + * This test draws a full screen in 2 separate draw calls (half screen each). + * Results are checked for each draw. + * @param t The base test + * @param format The framebuffer format + * @param fsShader The fragment shader with the following interface: + * Location 0 output is framebuffer with format + * Group 0 binding 0 is a u32 sized data + * @param width The framebuffer width + * @param height The framebuffer height + * @param checker A functor to check the framebuffer values + */ +async function runSubgroupTest( + t: FragmentBuiltinTest, + format: GPUTextureFormat, + fsShader: string, + width: number, + height: number, + checker: (data: Uint32Array) => Error | undefined +) { + const vsShader = ` +@vertex +fn vsMain(@builtin(vertex_index) index : u32) -> @builtin(position) vec4f { + const vertices = array( + vec2(-1, -1), vec2(-1, 1), vec2( 1, 1), + vec2(-1, -1), vec2( 1, -1), vec2( 1, 1), + ); + return vec4f(vec2f(vertices[index]), 0, 1); +}`; + + const pipeline = t.device.createRenderPipeline({ + layout: 'auto', + vertex: { + module: t.device.createShaderModule({ code: vsShader }), + }, + fragment: { + module: t.device.createShaderModule({ code: fsShader }), + targets: [{ format }], + }, + primitive: { + topology: 'triangle-list', + }, + }); + + const { blockWidth, blockHeight, bytesPerBlock } = kTextureFormatInfo[format]; + assert(bytesPerBlock !== undefined); + + const blocksPerRow = width / blockWidth; + const blocksPerColumn = height / blockHeight; + const bytesPerRow = align(blocksPerRow * (bytesPerBlock ?? 1), 256); + const byteLength = bytesPerRow * blocksPerColumn; + const uintLength = byteLength / 4; + + const buffer = t.makeBufferWithContents( + new Uint32Array([1]), + GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST + ); + + const bg = t.device.createBindGroup({ + layout: pipeline.getBindGroupLayout(0), + entries: [ + { + binding: 0, + resource: { + buffer, + }, + }, + ], + }); -g.test('subgroup_invocation_id').unimplemented(); + for (let i = 0; i < 2; i++) { + const framebuffer = t.createTextureTracked({ + size: [width, height], + usage: + GPUTextureUsage.COPY_SRC | + GPUTextureUsage.COPY_DST | + GPUTextureUsage.RENDER_ATTACHMENT | + GPUTextureUsage.TEXTURE_BINDING, + format, + }); + + const encoder = t.device.createCommandEncoder(); + const pass = encoder.beginRenderPass({ + colorAttachments: [ + { + view: framebuffer.createView(), + loadOp: 'clear', + storeOp: 'store', + }, + ], + }); + pass.setPipeline(pipeline); + pass.setBindGroup(0, bg); + pass.draw(3, 1, i); + pass.end(); + t.queue.submit([encoder.finish()]); + + const buffer = t.copyWholeTextureToNewBufferSimple(framebuffer, 0); + const readback = await t.readGPUBufferRangeTyped(buffer, { + srcByteOffset: 0, + type: Uint32Array, + typedLength: uintLength, + method: 'copy', + }); + const data: Uint32Array = readback.data; + + t.expectOK(checker(data)); + } +} + +g.test('subgroup_size') + .desc('Tests subgroup_size values') + .params(u => + u + .combine('size', kSizes) + .beginSubcases() + .combineWithParams([{ format: 'rgba32uint' }] as const) + ) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(async t => { + interface SubgroupLimits extends GPUSupportedLimits { + minSubgroupSize: number; + maxSubgroupSize: number; + } + const { minSubgroupSize, maxSubgroupSize } = t.device.limits as SubgroupLimits; + + const fsShader = ` +enable subgroups; + +const width = ${t.params.size[0]}; +const height = ${t.params.size[1]}; + +@group(0) @binding(0) var for_layout : u32; + +@fragment +fn fsMain( + @builtin(position) pos : vec4f, + @builtin(subgroup_size) sg_size : u32, +) -> @location(0) vec4u { + _ = for_layout; + + let ballot = countOneBits(subgroupBallot(true)); + let ballotSize = ballot.x + ballot.y + ballot.z + ballot.w; + + // Do all invocations in the subgroup see the same subgroup size? + let firstSize = subgroupBroadcast(sg_size, 0); + let compareBallot = countOneBits(subgroupBallot(firstSize == sg_size)); + let compareSize = compareBallot.x + compareBallot.y + compareBallot.z + compareBallot.w; + let sameSize = select(0u, 1u, compareSize == ballotSize); + + return vec4u(sg_size, ballotSize, sameSize, 0); +}`; + + await runSubgroupTest( + t, + t.params.format, + fsShader, + t.params.size[0], + t.params.size[1], + (data: Uint32Array) => { + return checkSubgroupSizeConsistency( + data, + t.params.format, + minSubgroupSize, + maxSubgroupSize, + t.params.size[0], + t.params.size[1] + ); + } + ); + }); + +/** + * Checks subgroup_invocation_id value consistency + * + * Very little uniformity is expected for subgroup_invocation_id. + * This function checks that all ids are less than the subgroup size + * and no id is repeated. + * @param data An array of vec4u that contains (per texel): + * * subgroup_invocation_id + * * ballot size + * * non-zero ID unique to each subgroup + * * 0 + * @param format The texture format of data + * @param width The width of the framebuffer + * @param height The height of the framebuffer + */ +function checkSubgroupInvocationIdConsistency( + data: Uint32Array, + format: GPUTextureFormat, + width: number, + height: number +): Error | undefined { + const { blockWidth, blockHeight, bytesPerBlock } = kTextureFormatInfo[format]; + const blocksPerRow = width / blockWidth; + const bytesPerRow = align(blocksPerRow * (bytesPerBlock ?? 1), 256); + const uintsPerRow = bytesPerRow / 4; + const uintsPerTexel = (bytesPerBlock ?? 1) / blockWidth / blockHeight / 4; + + const mappings = new Map(); + for (let row = 0; row < height; row++) { + for (let col = 0; col < width; col++) { + const offset = uintsPerRow * row + col * uintsPerTexel; + const id = data[offset]; + const size = data[offset + 1]; + const repId = data[offset + 2]; + + if (repId === 0) { + continue; + } + + if (size < id) { + return new Error( + `Invocation id '${id}' is greater than subgroup size '${size}' for (${row}, ${col})` + ); + } + + let v = mappings.get(repId) ?? 0n; + const mask = 1n << BigInt(id); + if ((mask & v) !== 0n) { + return new Error(`Multiple invocations with id '${id}' in subgroup '${repId}'`); + } + v |= mask; + mappings.set(repId, v); + } + } + + return undefined; +} + +g.test('subgroup_invocation_id') + .desc('Tests subgroup_invocation_id built-in value') + .params(u => + u + .combine('size', kSizes) + .beginSubcases() + .combineWithParams([{ format: 'rgba32uint' }] as const) + ) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(async t => { + const fsShader = ` +enable subgroups; + +const width = ${t.params.size[0]}; +const height = ${t.params.size[1]}; + +@group(0) @binding(0) var counter : atomic; + +@fragment +fn fsMain( + @builtin(position) pos : vec4f, + @builtin(subgroup_invocation_id) id : u32, + @builtin(subgroup_size) sg_size : u32, +) -> @location(0) vec4u { + let ballot = countOneBits(subgroupBallot(true)); + let ballotSize = ballot.x + ballot.y + ballot.z + ballot.w; + + // Generate representative id for this subgroup. + var repId = atomicAdd(&counter, 1); + repId = subgroupBroadcast(repId, 0); + + return vec4u(id, ballotSize, repId, 0); +}`; + + await runSubgroupTest( + t, + t.params.format, + fsShader, + t.params.size[0], + t.params.size[1], + (data: Uint32Array) => { + return checkSubgroupInvocationIdConsistency( + data, + t.params.format, + t.params.size[0], + t.params.size[1] + ); + } + ); + }); diff --git a/src/webgpu/shader/execution/shader_io/vertex_builtins.spec.ts b/src/webgpu/shader/execution/shader_io/vertex_builtins.spec.ts new file mode 100644 index 000000000000..baf5c98326a8 --- /dev/null +++ b/src/webgpu/shader/execution/shader_io/vertex_builtins.spec.ts @@ -0,0 +1,150 @@ +export const description = `Test vertex shader builtin variables + +* test builtin(clip_distances) +`; + +import { makeTestGroup } from '../../../../common/framework/test_group.js'; +import { GPUTest, TextureTestMixin } from '../../../gpu_test.js'; + +class VertexBuiltinTest extends TextureTestMixin(GPUTest) {} + +export const g = makeTestGroup(VertexBuiltinTest); + +g.test('outputs,clip_distances') + .desc( + ` + Test vertex shader builtin(clip_distances) values. + + In the tests, we draw a square with two triangles (top-right and bottom left), whose vertices + have different clip distances values. (Top Left: -1, Bottom Right: 1 Top Right & Bottom Left: 0) + 1. The clip distances values of the pixels in the top-left region should be less than 0 so these + pixels will all be invisible + 2. The clip distances values of the pixels on the top-right-to-bottom-left diagonal line should + be equal to 0 + 3. The clip distances values of the pixels in the bottom-right region should be greater than 0 + + -1 - - - - - 0 + | \\ x x + | \\ x x x + | \\ x x x + | x x\\ x x + | x x x x\\ x + 0 x x x x x 1 + ` + ) + .params(u => u.combine('clipDistances', [1, 2, 3, 4, 5, 6, 7, 8] as const)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('clip-distances'); + }) + .fn(t => { + const { clipDistances } = t.params; + + // Draw two triangles (top-right and bottom left) into Red, whose vertices have different clip + // distances values. (Top Left: -1, Bottom Right: 1 Top Right & Bottom Left: 0) + const code = ` + enable clip_distances; + const kClipDistancesSize = ${clipDistances}; + struct VertexOutputs { + @builtin(position) position : vec4f, + @builtin(clip_distances) clipDistances : array, + } + @vertex + fn vsMain(@builtin(vertex_index) vertexIndex : u32) -> VertexOutputs { + var posAndClipDistances = array( + vec3f(-1.0, 1.0, -1.0), + vec3f( 1.0, -1.0, 1.0), + vec3f( 1.0, 1.0, 0.0), + vec3f(-1.0, -1.0, 0.0), + vec3f( 1.0, -1.0, 1.0), + vec3f(-1.0, 1.0, -1.0)); + var vertexOutput : VertexOutputs; + vertexOutput.position = vec4f(posAndClipDistances[vertexIndex].xy, 0.0, 1.0); + vertexOutput.clipDistances[kClipDistancesSize - 1] = posAndClipDistances[vertexIndex].z; + return vertexOutput; + } + @fragment + fn fsMain() -> @location(0) vec4f { + return vec4f(1.0, 0.0, 0.0, 1.0); + }`; + const module = t.device.createShaderModule({ code }); + const renderPipeline = t.device.createRenderPipeline({ + layout: 'auto', + vertex: { + module, + }, + fragment: { + module, + targets: [ + { + format: 'rgba8unorm', + }, + ], + }, + }); + + const kSize = 7; + const outputTexture = t.createTextureTracked({ + format: 'rgba8unorm', + size: [kSize, kSize, 1] as const, + usage: GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.COPY_SRC, + }); + + // Clear outputTexture to Green + const commandEncoder = t.device.createCommandEncoder(); + const renderPassEncoder = commandEncoder.beginRenderPass({ + colorAttachments: [ + { + view: outputTexture.createView(), + loadOp: 'clear', + clearValue: { r: 0.0, g: 1.0, b: 0.0, a: 1.0 }, + storeOp: 'store', + }, + ], + }); + renderPassEncoder.setPipeline(renderPipeline); + renderPassEncoder.draw(6); + renderPassEncoder.end(); + + const kBytesPerRow = 256; + const kBytesPerPixel = 4; + const outputDataSize = kBytesPerRow * (kSize - 1) + kSize * kBytesPerPixel; + const outputBuffer = t.createBufferTracked({ + size: outputDataSize, + usage: GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST, + }); + + commandEncoder.copyTextureToBuffer( + { + texture: outputTexture, + }, + { + buffer: outputBuffer, + bytesPerRow: kBytesPerRow, + rowsPerImage: kSize, + }, + [kSize, kSize, 1] + ); + t.queue.submit([commandEncoder.finish()]); + + // The top-left part should be Green and the bottom-right part should be Red + const expectedData = new Uint8Array(outputDataSize); + for (let y = 0; y < kSize; ++y) { + const baseOffset = kBytesPerRow * y; + for (let x = 0; x < kSize; ++x) { + const lastRed = kSize - y - 1; + for (let i = 0; i < lastRed; ++i) { + expectedData[baseOffset + i * 4] = 0; + expectedData[baseOffset + i * 4 + 1] = 255; + expectedData[baseOffset + i * 4 + 2] = 0; + expectedData[baseOffset + i * 4 + 3] = 255; + } + for (let j = lastRed; j < kSize; ++j) { + expectedData[baseOffset + j * 4] = 255; + expectedData[baseOffset + j * 4 + 1] = 0; + expectedData[baseOffset + j * 4 + 2] = 0; + expectedData[baseOffset + j * 4 + 3] = 255; + } + } + } + t.expectGPUBufferValuesEqual(outputBuffer, expectedData); + }); diff --git a/src/webgpu/shader/execution/statement/phony.spec.ts b/src/webgpu/shader/execution/statement/phony.spec.ts index 1f28d040f2d8..309d8848523d 100644 --- a/src/webgpu/shader/execution/statement/phony.spec.ts +++ b/src/webgpu/shader/execution/statement/phony.spec.ts @@ -88,6 +88,10 @@ const kTests = { src: `_ = put(42i);`, values: [42, 0], }, + call_in_subexpr: { + src: `_ = put(42i) + 1;`, + values: [42, 0], + }, nested_call: { src: `_ = put(put(42)+1);`, values: [42, 43, 0], diff --git a/src/webgpu/shader/validation/decl/var.spec.ts b/src/webgpu/shader/validation/decl/var.spec.ts index f9e15bd6e2a6..1abf8bcf4fdb 100644 --- a/src/webgpu/shader/validation/decl/var.spec.ts +++ b/src/webgpu/shader/validation/decl/var.spec.ts @@ -749,7 +749,8 @@ g.test('var_access_mode_bad_other_template_contents') .fn(t => { const prog = `@group(0) @binding(0) var<${t.params.prefix}${t.params.accessMode}${t.params.suffix}> x: i32;`; - const ok = t.params.prefix === 'storage,' && t.params.suffix === ''; + const ok = + t.params.prefix === 'storage,' && (t.params.suffix === '' || t.params.suffix === ','); t.expectCompileResult(ok, prog); }); diff --git a/src/webgpu/shader/validation/expression/binary/short_circuiting_and_or.spec.ts b/src/webgpu/shader/validation/expression/binary/short_circuiting_and_or.spec.ts new file mode 100644 index 000000000000..30f521e54944 --- /dev/null +++ b/src/webgpu/shader/validation/expression/binary/short_circuiting_and_or.spec.ts @@ -0,0 +1,264 @@ +export const description = ` +Validation tests for short-circuiting && and || expressions. +`; + +import { makeTestGroup } from '../../../../../common/framework/test_group.js'; +import { keysOf, objectsToRecord } from '../../../../../common/util/data_tables.js'; +import { + kAllScalarsAndVectors, + ScalarType, + scalarTypeOf, + Type, +} from '../../../../util/conversion.js'; +import { ShaderValidationTest } from '../../shader_validation_test.js'; + +export const g = makeTestGroup(ShaderValidationTest); + +// A list of scalar and vector types. +const kScalarAndVectorTypes = objectsToRecord(kAllScalarsAndVectors); + +g.test('scalar_vector') + .desc( + ` + Validates that scalar and vector short-circuiting operators are only accepted for scalar booleans. + ` + ) + .params(u => + u + .combine('op', ['&&', '||']) + .combine('lhs', keysOf(kScalarAndVectorTypes)) + .combine( + 'rhs', + // Skip vec3 and vec4 on the RHS to keep the number of subcases down. + keysOf(kScalarAndVectorTypes).filter( + value => !(value.startsWith('vec3') || value.startsWith('vec4')) + ) + ) + .beginSubcases() + ) + .beforeAllSubcases(t => { + if ( + scalarTypeOf(kScalarAndVectorTypes[t.params.lhs]) === Type.f16 || + scalarTypeOf(kScalarAndVectorTypes[t.params.rhs]) === Type.f16 + ) { + t.selectDeviceOrSkipTestCase('shader-f16'); + } + }) + .fn(t => { + const lhs = kScalarAndVectorTypes[t.params.lhs]; + const rhs = kScalarAndVectorTypes[t.params.rhs]; + const lhsElement = scalarTypeOf(lhs); + const rhsElement = scalarTypeOf(rhs); + const hasF16 = lhsElement === Type.f16 || rhsElement === Type.f16; + const code = ` +${hasF16 ? 'enable f16;' : ''} +const lhs = ${lhs.create(0).wgsl()}; +const rhs = ${rhs.create(0).wgsl()}; +const foo = lhs ${t.params.op} rhs; +`; + + // Determine if the types are compatible. + let valid = false; + if (lhs instanceof ScalarType && rhs instanceof ScalarType) { + valid = lhsElement === Type.bool && rhsElement === Type.bool; + } + + t.expectCompileResult(valid, code); + }); + +interface InvalidTypeConfig { + // An expression that produces a value of the target type. + expr: string; + // A function that converts an expression of the target type into a valid boolean operand. + control: (x: string) => string; +} +const kInvalidTypes: Record = { + mat2x2f: { + expr: 'm', + control: e => `bool(${e}[0][0])`, + }, + + array: { + expr: 'arr', + control: e => `${e}[0]`, + }, + + ptr: { + expr: '(&b)', + control: e => `*${e}`, + }, + + atomic: { + expr: 'a', + control: e => `bool(atomicLoad(&${e}))`, + }, + + texture: { + expr: 't', + control: e => `bool(textureLoad(${e}, vec2(), 0).x)`, + }, + + sampler: { + expr: 's', + control: e => `bool(textureSampleLevel(t, ${e}, vec2(), 0).x)`, + }, + + struct: { + expr: 'str', + control: e => `${e}.b`, + }, +}; + +g.test('invalid_types') + .desc( + ` + Validates that short-circuiting expressions are never accepted for non-scalar and non-vector types. + ` + ) + .params(u => + u + .combine('op', ['&&', '||']) + .combine('type', keysOf(kInvalidTypes)) + .combine('control', [true, false]) + .beginSubcases() + ) + .fn(t => { + const type = kInvalidTypes[t.params.type]; + const expr = t.params.control ? type.control(type.expr) : type.expr; + const code = ` +@group(0) @binding(0) var t : texture_2d; +@group(0) @binding(1) var s : sampler; +@group(0) @binding(2) var a : atomic; + +struct S { b : bool } + +var b : bool; +var m : mat2x2f; +var arr : array; +var str : S; + +@compute @workgroup_size(1) +fn main() { + let foo = ${expr} ${t.params.op} ${expr}; +} +`; + + t.expectCompileResult(t.params.control, code); + }); + +// A map from operator to the value of the LHS that will cause short-circuiting. +const kLhsForShortCircuit: Record = { + '&&': false, + '||': true, +}; + +// A list of expressions that are invalid unless guarded by a short-circuiting expression. +const kInvalidRhsExpressions: Record = { + overflow: 'i32(1< + u + .combine('op', ['&&', '||']) + .combine('rhs', keysOf(kInvalidRhsExpressions)) + .combine('short_circuit', [true, false]) + .beginSubcases() + ) + .fn(t => { + let lhs = kLhsForShortCircuit[t.params.op]; + if (!t.params.short_circuit) { + lhs = !lhs; + } + const code = ` +const thirty_one = 31u; +const zero_i32 = 0i; +const one_f32 = 1.0f; + +@compute @workgroup_size(1) +fn main() { + let foo = ${lhs} ${t.params.op} ${kInvalidRhsExpressions[t.params.rhs]}; +} +`; + + t.expectCompileResult(t.params.short_circuit, code); + }); + +g.test('invalid_rhs_override') + .desc( + ` + Validates that a short-circuiting expression with an override-expression LHS guards the evaluation of its RHS expression. + ` + ) + .params(u => + u + .combine('op', ['&&', '||']) + .combine('rhs', keysOf(kInvalidRhsExpressions)) + .combine('short_circuit', [true, false]) + .beginSubcases() + ) + .fn(t => { + let lhs = kLhsForShortCircuit[t.params.op]; + if (!t.params.short_circuit) { + lhs = !lhs; + } + const code = ` +override cond : bool; +override zero_i32 = 0i; +override one_f32 = 1.0f; +override thirty_one = 31u; +override foo = cond ${t.params.op} ${kInvalidRhsExpressions[t.params.rhs]}; +`; + + const constants: Record = {}; + constants['cond'] = lhs ? 1 : 0; + t.expectPipelineResult({ + expectedResult: t.params.short_circuit, + code, + constants, + reference: ['foo'], + }); + }); + +// A list of expressions that are invalid unless guarded by a short-circuiting expression. +// The control case will use `value = 10`, the failure case will use `value = 1`. +const kInvalidArrayCounts: Record = { + negative: 'value - 2', + sqrt_neg1: 'u32(sqrt(value - 2))', + nested: '10 + array()[0]', +}; + +g.test('invalid_array_count_on_rhs') + .desc( + ` + Validates that an invalid array count expression is not guarded by a short-circuiting expression. + ` + ) + .params(u => + u + .combine('op', ['&&', '||']) + .combine('rhs', keysOf(kInvalidArrayCounts)) + .combine('control', [true, false]) + .beginSubcases() + ) + .fn(t => { + const lhs = t.params.op === '&&' ? 'false' : 'true'; + const code = ` +const value = ${t.params.control ? '10' : '1'}; + +@compute @workgroup_size(1) +fn main() { + let foo = ${lhs} ${t.params.op} array()[0]; +} +`; + + t.expectCompileResult(t.params.control, code); + }); diff --git a/src/webgpu/shader/validation/expression/call/builtin/clamp.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/clamp.spec.ts index 1ac752a3bfa9..ff0114097f90 100644 --- a/src/webgpu/shader/validation/expression/call/builtin/clamp.spec.ts +++ b/src/webgpu/shader/validation/expression/call/builtin/clamp.spec.ts @@ -127,6 +127,8 @@ Validates that low <= high. const scalar = scalarTypeOf(ty); return scalar !== Type.abstractInt && scalar !== Type.abstractFloat; }) + // in_shader: Is the function call statically accessed by the entry point? + .combine('in_shader', [false, true] as const) ) .beforeAllSubcases(t => { const ty = kValuesTypes[t.params.type]; @@ -176,7 +178,10 @@ fn foo() { const shader_error = error && t.params.lowStage === 'constant' && t.params.highStage === 'constant'; const pipeline_error = - error && t.params.lowStage !== 'runtime' && t.params.highStage !== 'runtime'; + t.params.in_shader && + error && + t.params.lowStage !== 'runtime' && + t.params.highStage !== 'runtime'; t.expectCompileResult(!shader_error, wgsl); if (!shader_error) { const constants: Record = {}; @@ -187,6 +192,7 @@ fn foo() { code: wgsl, constants, reference: ['o_low', 'o_high'], + statements: t.params.in_shader ? ['foo();'] : [], }); } }); diff --git a/src/webgpu/shader/validation/expression/call/builtin/extractBits.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/extractBits.spec.ts index 80fe7ccaca5e..32abc477ee8f 100644 --- a/src/webgpu/shader/validation/expression/call/builtin/extractBits.spec.ts +++ b/src/webgpu/shader/validation/expression/call/builtin/extractBits.spec.ts @@ -98,6 +98,8 @@ Validates that count and offset must be smaller than the size of the primitive. { offset: 0, count: 33 }, { offset: 1, count: 33 }, ] as const) + // in_shader: Is the function call statically accessed by the entry point? + .combine('in_shader', [false, true] as const) ) .fn(t => { let offsetArg = ''; @@ -138,7 +140,10 @@ fn foo() { const shader_error = error && t.params.offsetStage === 'constant' && t.params.countStage === 'constant'; const pipeline_error = - error && t.params.offsetStage !== 'runtime' && t.params.countStage !== 'runtime'; + t.params.in_shader && + error && + t.params.offsetStage !== 'runtime' && + t.params.countStage !== 'runtime'; t.expectCompileResult(!shader_error, wgsl); if (!shader_error) { const constants: Record = {}; @@ -149,6 +154,7 @@ fn foo() { code: wgsl, constants, reference: ['o_offset', 'o_count'], + statements: t.params.in_shader ? ['foo();'] : [], }); } }); diff --git a/src/webgpu/shader/validation/expression/call/builtin/insertBits.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/insertBits.spec.ts index 57644ad36fb4..b302bfd14677 100644 --- a/src/webgpu/shader/validation/expression/call/builtin/insertBits.spec.ts +++ b/src/webgpu/shader/validation/expression/call/builtin/insertBits.spec.ts @@ -119,6 +119,8 @@ Validates that count and offset must be smaller than the size of the primitive. { offset: 0, count: 33 }, { offset: 1, count: 33 }, ] as const) + // in_shader: Is the function call statically accessed by the entry point? + .combine('in_shader', [false, true] as const) ) .fn(t => { let offsetArg = ''; @@ -160,7 +162,10 @@ fn foo() { const shader_error = error && t.params.offsetStage === 'constant' && t.params.countStage === 'constant'; const pipeline_error = - error && t.params.offsetStage !== 'runtime' && t.params.countStage !== 'runtime'; + t.params.in_shader && + error && + t.params.offsetStage !== 'runtime' && + t.params.countStage !== 'runtime'; t.expectCompileResult(!shader_error, wgsl); if (!shader_error) { const constants: Record = {}; @@ -171,6 +176,7 @@ fn foo() { code: wgsl, constants, reference: ['o_offset', 'o_count'], + statements: t.params.in_shader ? ['foo();'] : [], }); } }); diff --git a/src/webgpu/shader/validation/expression/call/builtin/ldexp.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/ldexp.spec.ts index 826354d1ff08..55a702d71f0a 100644 --- a/src/webgpu/shader/validation/expression/call/builtin/ldexp.spec.ts +++ b/src/webgpu/shader/validation/expression/call/builtin/ldexp.spec.ts @@ -143,6 +143,8 @@ g.test('partial_values') cases.push({ value: bias + 2 }); return cases; }) + // in_shader: Is the functino call statically accessed by the entry point? + .combine('in_shader', [false, true] as const) ) .beforeAllSubcases(t => { const ty = kValidArgumentTypesA[t.params.typeA]; @@ -179,7 +181,7 @@ fn foo() { const bias = biasForType(scalarTypeOf(tyA)); const error = t.params.value > bias + 1; const shader_error = error && t.params.stage === 'constant'; - const pipeline_error = error && t.params.stage === 'override'; + const pipeline_error = t.params.in_shader && error && t.params.stage === 'override'; t.expectCompileResult(!shader_error, wgsl); if (!shader_error) { const constants: Record = {}; @@ -189,6 +191,7 @@ fn foo() { code: wgsl, constants, reference: ['o_b'], + statements: t.params.in_shader ? ['foo();'] : [], }); } }); diff --git a/src/webgpu/shader/validation/expression/call/builtin/normalize.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/normalize.spec.ts index 28e1d9cdc61b..bed18020632d 100644 --- a/src/webgpu/shader/validation/expression/call/builtin/normalize.spec.ts +++ b/src/webgpu/shader/validation/expression/call/builtin/normalize.spec.ts @@ -12,7 +12,13 @@ import { scalarTypeOf, ScalarType, } from '../../../../../util/conversion.js'; -import { QuantizeFunc, quantizeToF16, quantizeToF32 } from '../../../../../util/math.js'; +import { + QuantizeFunc, + quantizeToF16, + quantizeToF32, + isSubnormalNumberF16, + isSubnormalNumberF32, +} from '../../../../../util/math.js'; import { ShaderValidationTest } from '../../../shader_validation_test.js'; import { @@ -37,6 +43,17 @@ function quantizeFunctionForScalarType(type: ScalarType): QuantizeFunc { } } +function isSubnormalFunctionForScalarType(type: ScalarType): (v: number) => boolean { + switch (type) { + case Type.f32: + return isSubnormalNumberF32; + case Type.f16: + return isSubnormalNumberF16; + default: + return (v: number) => false; + } +} + g.test('values') .desc( ` @@ -73,6 +90,11 @@ Validates that constant evaluation and override evaluation of ${builtin}() rejec expectedResult = false; } + // We skip tests with values that would involve subnormal computations in + // order to avoid defining a specific behavior (flush to zero). + const isSubnormalFn = isSubnormalFunctionForScalarType(scalarType); + t.skipIf(isSubnormalFn(vv) || isSubnormalFn(dp) || isSubnormalFn(len)); + validateConstOrOverrideBuiltinEval( t, builtin, diff --git a/src/webgpu/shader/validation/expression/call/builtin/quadBroadcast.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/quadBroadcast.spec.ts new file mode 100644 index 000000000000..6988f17b9ede --- /dev/null +++ b/src/webgpu/shader/validation/expression/call/builtin/quadBroadcast.spec.ts @@ -0,0 +1,286 @@ +export const description = ` +Validation tests for quadBroadcast +`; + +import { makeTestGroup } from '../../../../../../common/framework/test_group.js'; +import { keysOf, objectsToRecord } from '../../../../../../common/util/data_tables.js'; +import { + isConvertible, + Type, + elementTypeOf, + kAllScalarsAndVectors, +} from '../../../../../util/conversion.js'; +import { ShaderValidationTest } from '../../../shader_validation_test.js'; + +export const g = makeTestGroup(ShaderValidationTest); + +g.test('requires_subgroups') + .desc('Validates that the subgroups feature is required') + .params(u => u.combine('enable', [false, true] as const)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const wgsl = ` +${t.params.enable ? 'enable subgroups;' : ''} +fn foo() { + _ = quadBroadcast(0, 0); +}`; + + t.expectCompileResult(t.params.enable, wgsl); + }); + +g.test('requires_subgroups_f16') + .desc('Validates that the subgroups feature is required') + .params(u => u.combine('enable', [false, true] as const)) + .beforeAllSubcases(t => { + const features: GPUFeatureName[] = ['shader-f16', 'subgroups' as GPUFeatureName]; + if (t.params.enable) { + features.push('subgroups-f16' as GPUFeatureName); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(t => { + const wgsl = ` +enable f16; +enable subgroups; +${t.params.enable ? 'enable subgroups_f16;' : ''} +fn foo() { + _ = quadBroadcast(0h, 0); +}`; + + t.expectCompileResult(t.params.enable, wgsl); + }); + +const kArgumentTypes = objectsToRecord(kAllScalarsAndVectors); + +const kStages: Record = { + constant: ` +enable subgroups; +@compute @workgroup_size(16) +fn main() { + const x = quadBroadcast(0, 0); +}`, + override: ` +enable subgroups; +override o = quadBroadcast(0, 0);`, + runtime: ` +enable subgroups; +@compute @workgroup_size(16) +fn main() { + let x = quadBroadcast(0, 0); +}`, +}; + +g.test('early_eval') + .desc('Ensures the builtin is not able to be compile time evaluated') + .params(u => u.combine('stage', keysOf(kStages))) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const code = kStages[t.params.stage]; + t.expectCompileResult(t.params.stage === 'runtime', code); + }); + +g.test('must_use') + .desc('Tests that the builtin has the @must_use attribute') + .params(u => u.combine('must_use', [true, false] as const)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const wgsl = ` +enable subgroups; +@compute @workgroup_size(16) +fn main() { + ${t.params.must_use ? '_ = ' : ''}quadBroadcast(0, 0); +}`; + + t.expectCompileResult(t.params.must_use, wgsl); + }); + +g.test('data_type') + .desc('Validates data parameter type') + .params(u => u.combine('type', keysOf(kArgumentTypes))) + .beforeAllSubcases(t => { + const features = ['subgroups' as GPUFeatureName]; + const type = kArgumentTypes[t.params.type]; + if (type.requiresF16()) { + features.push('subgroups-f16' as GPUFeatureName); + features.push('shader-f16'); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(t => { + const type = kArgumentTypes[t.params.type]; + let enables = `enable subgroups;\n`; + if (type.requiresF16()) { + enables += `enable subgroups_f16;\nenable f16;`; + } + const wgsl = ` +${enables} +@compute @workgroup_size(1) +fn main() { + _ = quadBroadcast(${type.create(0).wgsl()}, 0); +}`; + + t.expectCompileResult(elementTypeOf(type) !== Type.bool, wgsl); + }); + +g.test('return_type') + .desc('Validates data parameter type') + .params(u => + u + .combine('dataType', keysOf(kArgumentTypes)) + .combine('retType', keysOf(kArgumentTypes)) + .filter(t => { + const retType = kArgumentTypes[t.retType]; + const retEleTy = elementTypeOf(retType); + const dataType = kArgumentTypes[t.dataType]; + const dataEleTy = elementTypeOf(dataType); + return ( + retEleTy !== Type.abstractInt && + retEleTy !== Type.abstractFloat && + dataEleTy !== Type.abstractInt && + dataEleTy !== Type.abstractFloat + ); + }) + ) + .beforeAllSubcases(t => { + const features = ['subgroups' as GPUFeatureName]; + const dataType = kArgumentTypes[t.params.dataType]; + const retType = kArgumentTypes[t.params.retType]; + if (dataType.requiresF16() || retType.requiresF16()) { + features.push('subgroups-f16' as GPUFeatureName); + features.push('shader-f16'); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(t => { + const dataType = kArgumentTypes[t.params.dataType]; + const retType = kArgumentTypes[t.params.retType]; + let enables = `enable subgroups;\n`; + if (dataType.requiresF16() || retType.requiresF16()) { + enables += `enable subgroups_f16;\nenable f16;`; + } + const wgsl = ` +${enables} +@compute @workgroup_size(1) +fn main() { + let res : ${retType.toString()} = quadBroadcast(${dataType.create(0).wgsl()}, 0); +}`; + + const expect = elementTypeOf(dataType) !== Type.bool && dataType === retType; + t.expectCompileResult(expect, wgsl); + }); + +g.test('id_type') + .desc('Validates id parameter type') + .params(u => u.combine('type', keysOf(kArgumentTypes))) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const type = kArgumentTypes[t.params.type]; + const wgsl = ` +enable subgroups; +@compute @workgroup_size(1) +fn main() { + _ = quadBroadcast(0, ${type.create(0).wgsl()}); +}`; + + const expect = isConvertible(type, Type.u32) || isConvertible(type, Type.i32); + t.expectCompileResult(expect, wgsl); + }); + +const kIdCases = { + const_decl: { + code: 'const_decl', + valid: true, + }, + const_literal: { + code: '0', + valid: true, + }, + const_expr: { + code: 'const_decl + 2', + valid: true, + }, + let_decl: { + code: 'let_decl', + valid: false, + }, + override_decl: { + code: 'override_decl', + valid: false, + }, + var_func_decl: { + code: 'var_func_decl', + valid: false, + }, + var_priv_decl: { + code: 'var_priv_decl', + valid: false, + }, +}; + +g.test('id_constness') + .desc('Validates that id must be a const-expression') + .params(u => u.combine('value', keysOf(kIdCases))) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const wgsl = ` +enable subgroups; +override override_decl : u32; +var var_priv_decl : u32; +fn foo() { + var var_func_decl : u32; + let let_decl = var_func_decl; + const const_decl = 0u; + _ = quadBroadcast(0, ${kIdCases[t.params.value].code}); +}`; + + t.expectCompileResult(kIdCases[t.params.value].valid, wgsl); + }); + +g.test('stage') + .desc('Validates it is only usable in correct stage') + .params(u => u.combine('stage', ['compute', 'fragment', 'vertex'] as const)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const compute = ` +@compute @workgroup_size(1) +fn main() { + foo(); +}`; + + const fragment = ` +@fragment +fn main() { + foo(); +}`; + + const vertex = ` +@vertex +fn main() -> @builtin(position) vec4f { + foo(); + return vec4f(); +}`; + + const entry = { compute, fragment, vertex }[t.params.stage]; + const wgsl = ` +enable subgroups; +fn foo() { + _ = quadBroadcast(0, 0); +} + +${entry} +`; + + t.expectCompileResult(t.params.stage !== 'vertex', wgsl); + }); diff --git a/src/webgpu/shader/validation/expression/call/builtin/quadSwap.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/quadSwap.spec.ts new file mode 100644 index 000000000000..3812ba057ed6 --- /dev/null +++ b/src/webgpu/shader/validation/expression/call/builtin/quadSwap.spec.ts @@ -0,0 +1,227 @@ +export const description = ` +Validation tests for quadSwapX, quadSwapY, and quadSwapDiagonal. +`; + +import { makeTestGroup } from '../../../../../../common/framework/test_group.js'; +import { keysOf, objectsToRecord } from '../../../../../../common/util/data_tables.js'; +import { + Type, + elementTypeOf, + kAllScalarsAndVectors, + isConvertible, +} from '../../../../../util/conversion.js'; +import { ShaderValidationTest } from '../../../shader_validation_test.js'; + +export const g = makeTestGroup(ShaderValidationTest); + +const kOps = ['quadSwapX', 'quadSwapY', 'quadSwapDiagonal'] as const; + +g.test('requires_subgroups') + .desc('Validates that the subgroups feature is required') + .params(u => u.combine('enable', [false, true] as const).combine('op', kOps)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const wgsl = ` +${t.params.enable ? 'enable subgroups;' : ''} +fn foo() { + _ = ${t.params.op}(0); +}`; + + t.expectCompileResult(t.params.enable, wgsl); + }); + +g.test('requires_subgroups_f16') + .desc('Validates that the subgroups feature is required') + .params(u => u.combine('enable', [false, true] as const).combine('op', kOps)) + .beforeAllSubcases(t => { + const features: GPUFeatureName[] = ['shader-f16', 'subgroups' as GPUFeatureName]; + if (t.params.enable) { + features.push('subgroups-f16' as GPUFeatureName); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(t => { + const wgsl = ` +enable f16; +enable subgroups; +${t.params.enable ? 'enable subgroups_f16;' : ''} +fn foo() { + _ = ${t.params.op}(0h); +}`; + + t.expectCompileResult(t.params.enable, wgsl); + }); + +const kStages: Record string> = { + constant: (op: string) => { + return ` +enable subgroups; +@compute @workgroup_size(16) +fn main() { + const x = ${op}(0); +}`; + }, + override: (op: string) => { + return ` +enable subgroups +override o = ${op}(0);`; + }, + runtime: (op: string) => { + return ` +enable subgroups; +@compute @workgroup_size(16) +fn main() { + let x = ${op}(0); +}`; + }, +}; + +g.test('early_eval') + .desc('Ensures the builtin is not able to be compile time evaluated') + .params(u => u.combine('stage', keysOf(kStages)).combine('op', kOps)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const code = kStages[t.params.stage](t.params.op); + t.expectCompileResult(t.params.stage === 'runtime', code); + }); + +g.test('must_use') + .desc('Tests that the builtin has the @must_use attribute') + .params(u => u.combine('must_use', [true, false] as const).combine('op', kOps)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const wgsl = ` +enable subgroups; +@compute @workgroup_size(16) +fn main() { + ${t.params.must_use ? '_ = ' : ''}${t.params.op}(0); +}`; + + t.expectCompileResult(t.params.must_use, wgsl); + }); + +const kTypes = objectsToRecord(kAllScalarsAndVectors); + +g.test('data_type') + .desc('Validates data parameter type') + .params(u => u.combine('type', keysOf(kTypes)).combine('op', kOps)) + .beforeAllSubcases(t => { + const features = ['subgroups' as GPUFeatureName]; + const type = kTypes[t.params.type]; + if (type.requiresF16()) { + features.push('shader-f16'); + features.push('subgroups-f16' as GPUFeatureName); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(t => { + const type = kTypes[t.params.type]; + let enables = `enable subgroups;\n`; + if (type.requiresF16()) { + enables += `enable f16;\nenable subgroups_f16;`; + } + const wgsl = ` +${enables} +@compute @workgroup_size(1) +fn main() { + _ = ${t.params.op}(${type.create(0).wgsl()}); +}`; + + const eleType = elementTypeOf(type); + t.expectCompileResult(eleType !== Type.bool, wgsl); + }); + +g.test('return_type') + .desc('Validates return type') + .params(u => + u + .combine('retType', keysOf(kTypes)) + .filter(t => { + const type = kTypes[t.retType]; + const eleType = elementTypeOf(type); + return eleType !== Type.abstractInt && eleType !== Type.abstractFloat; + }) + .combine('op', kOps) + .combine('paramType', keysOf(kTypes)) + ) + .beforeAllSubcases(t => { + const features = ['subgroups' as GPUFeatureName]; + const retType = kTypes[t.params.retType]; + const paramType = kTypes[t.params.paramType]; + if (retType.requiresF16() || paramType.requiresF16()) { + features.push('shader-f16'); + features.push('subgroups-f16' as GPUFeatureName); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(t => { + const retType = kTypes[t.params.retType]; + const paramType = kTypes[t.params.paramType]; + let enables = `enable subgroups;\n`; + if (retType.requiresF16() || paramType.requiresF16()) { + enables += `enable f16;\nenable subgroups_f16;`; + } + const wgsl = ` +${enables} +@compute @workgroup_size(1) +fn main() { + let res : ${retType.toString()} = ${t.params.op}(${paramType.create(0).wgsl()}); +}`; + + // Can't just use isConvertible since functions must concretize the parameter + // type before examining the whole statement. + const eleParamType = elementTypeOf(paramType); + const eleRetType = elementTypeOf(retType); + let expect = paramType === retType && eleRetType !== Type.bool; + if (eleParamType === Type.abstractInt) { + expect = eleRetType === Type.i32 && isConvertible(paramType, retType); + } else if (eleParamType === Type.abstractFloat) { + expect = eleRetType === Type.f32 && isConvertible(paramType, retType); + } + t.expectCompileResult(expect, wgsl); + }); + +g.test('stage') + .desc('validates builtin is only usable in the correct stages') + .params(u => u.combine('stage', ['compute', 'fragment', 'vertex'] as const).combine('op', kOps)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const compute = ` +@compute @workgroup_size(1) +fn main() { + foo(); +}`; + + const fragment = ` +@fragment +fn main() { + foo(); +}`; + + const vertex = ` +@vertex +fn main() -> @builtin(position) vec4f { + foo(); + return vec4f(); +}`; + + const entry = { compute, fragment, vertex }[t.params.stage]; + const wgsl = ` +enable subgroups; +fn foo() { + _ = ${t.params.op}(0); +} + +${entry} +`; + + t.expectCompileResult(t.params.stage !== 'vertex', wgsl); + }); diff --git a/src/webgpu/shader/validation/expression/call/builtin/refract.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/refract.spec.ts index 51cf9553785c..387340f80e9f 100644 --- a/src/webgpu/shader/validation/expression/call/builtin/refract.spec.ts +++ b/src/webgpu/shader/validation/expression/call/builtin/refract.spec.ts @@ -2,10 +2,21 @@ const builtin = 'refract'; export const description = ` Validation tests for the ${builtin}() builtin. `; - import { makeTestGroup } from '../../../../../../common/framework/test_group.js'; import { keysOf, objectsToRecord } from '../../../../../../common/util/data_tables.js'; -import { Type, kConvertableToFloatVectors, scalarTypeOf } from '../../../../../util/conversion.js'; +import { + Type, + kConvertableToFloatVectors, + scalarTypeOf, + ScalarType, +} from '../../../../../util/conversion.js'; +import { + QuantizeFunc, + quantizeToF16, + quantizeToF32, + isSubnormalNumberF16, + isSubnormalNumberF32, +} from '../../../../../util/math.js'; import { ShaderValidationTest } from '../../../shader_validation_test.js'; import { @@ -20,6 +31,28 @@ export const g = makeTestGroup(ShaderValidationTest); const kValidArgumentTypes = objectsToRecord(kConvertableToFloatVectors); +function quantizeFunctionForScalarType(type: ScalarType): QuantizeFunc { + switch (type) { + case Type.f32: + return quantizeToF32; + case Type.f16: + return quantizeToF16; + default: + return (v: number) => v; + } +} + +function isSubnormalFunctionForScalarType(type: ScalarType): (v: number) => boolean { + switch (type) { + case Type.f32: + return isSubnormalNumberF32; + case Type.f16: + return isSubnormalNumberF16; + default: + return (v: number) => false; + } +} + g.test('values') .desc( ` @@ -64,6 +97,17 @@ where a the calculations result in a non-representable value for the given type. const c2_one_minus_b_dot_a_2 = vCheck.checkedResult(c2 * one_minus_b_dot_a_2); const k = vCheck.checkedResult(1.0 - c2_one_minus_b_dot_a_2); + const quantizeFn = quantizeFunctionForScalarType(scalarType); + const isSubnormalFn = isSubnormalFunctionForScalarType(scalarType); + // We skip tests with values that would involve subnormal computations in + // order to avoid defining a specific behavior (flush to zero). + t.skipIf( + isSubnormalFn(quantizeFn(b_dot_a)) || + isSubnormalFn(quantizeFn(b_dot_a_2)) || + isSubnormalFn(quantizeFn(c2)) || + isSubnormalFn(quantizeFn(k)) + ); + if (k >= 0) { // If the k is near zero it may fail on some implementations which implement sqrt as // 1/inversesqrt, so skip the test. diff --git a/src/webgpu/shader/validation/expression/call/builtin/smoothstep.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/smoothstep.spec.ts index 5a5a28fc7362..2879055ab216 100644 --- a/src/webgpu/shader/validation/expression/call/builtin/smoothstep.spec.ts +++ b/src/webgpu/shader/validation/expression/call/builtin/smoothstep.spec.ts @@ -51,16 +51,15 @@ Validates that constant evaluation and override evaluation of ${builtin}() rejec .fn(t => { const type = kValuesTypes[t.params.type]; - // We expect to fail if low >= high as it results in a DBZ - const expectedResult = t.params.value1 >= t.params.value2; + // We expect to fail if low >= high. + const expectedResult = t.params.value1 < t.params.value2; validateConstOrOverrideBuiltinEval( t, builtin, expectedResult, [type.create(t.params.value1), type.create(t.params.value2), type.create(0)], - t.params.stage, - /* returnType */ concreteTypeOf(type, [Type.f32]) + t.params.stage ); }); @@ -81,6 +80,8 @@ g.test('partial_eval_errors') .beginSubcases() .expand('low', u => [0, 10]) .expand('high', u => [0, 10]) + // in_shader: Is the function call statically accessed by the entry point? + .combine('in_shader', [false, true] as const) ) .beforeAllSubcases(t => { if (scalarTypeOf(kValuesTypes[t.params.type]) === Type.f16) { @@ -130,7 +131,10 @@ fn foo() { const shader_error = error && t.params.lowStage === 'constant' && t.params.highStage === 'constant'; const pipeline_error = - error && t.params.lowStage !== 'runtime' && t.params.highStage !== 'runtime'; + t.params.in_shader && + error && + t.params.lowStage !== 'runtime' && + t.params.highStage !== 'runtime'; t.expectCompileResult(!shader_error, wgsl); if (!shader_error) { const constants: Record = {}; @@ -141,6 +145,7 @@ fn foo() { code: wgsl, constants, reference: ['o_low', 'o_high'], + statements: t.params.in_shader ? ['foo();'] : [], }); } }); @@ -159,10 +164,11 @@ Validates that scalar and vector arguments are rejected by ${builtin}() if not f }) .fn(t => { const type = kArgumentTypes[t.params.type]; + const expectedResult = isConvertibleToFloatType(elementTypeOf(type)); validateConstOrOverrideBuiltinEval( t, builtin, - /* expectedResult */ isConvertibleToFloatType(elementTypeOf(type)), + expectedResult, [type.create(0), type.create(1), type.create(2)], 'constant', /* returnType */ concreteTypeOf(type, [Type.f32]) @@ -344,7 +350,7 @@ g.test('early_eval_errors') t, builtin, /* expectedResult */ t.params.low < t.params.high, - [f32(0), f32(t.params.low), f32(t.params.high)], + [f32(t.params.low), f32(t.params.high), f32(0)], t.params.stage ); }); diff --git a/src/webgpu/shader/validation/expression/call/builtin/subgroupAdd.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/subgroupAdd.spec.ts new file mode 100644 index 000000000000..4f2a2af52197 --- /dev/null +++ b/src/webgpu/shader/validation/expression/call/builtin/subgroupAdd.spec.ts @@ -0,0 +1,235 @@ +export const description = ` +Validation tests for subgroupAdd and subgroupExclusiveAdd +`; + +import { makeTestGroup } from '../../../../../../common/framework/test_group.js'; +import { keysOf, objectsToRecord } from '../../../../../../common/util/data_tables.js'; +import { Type, elementTypeOf, kAllScalarsAndVectors } from '../../../../../util/conversion.js'; +import { ShaderValidationTest } from '../../../shader_validation_test.js'; + +export const g = makeTestGroup(ShaderValidationTest); + +const kBuiltins = ['subgroupAdd', 'subgroupExclusiveAdd', 'subgroupInclusiveAdd'] as const; + +const kStages: Record string> = { + constant: (builtin: string) => { + return ` +enable subgroups; +@compute @workgroup_size(16) +fn main() { + const x = ${builtin}(0); +}`; + }, + override: (builtin: string) => { + return ` +enable subgroups; +override o = ${builtin}(0);`; + }, + runtime: (builtin: string) => { + return ` +enable subgroups; +@compute @workgroup_size(16) +fn main() { + let x = ${builtin}(0); +}`; + }, +}; + +g.test('early_eval') + .desc('Ensures the builtin is not able to be compile time evaluated') + .params(u => u.combine('stage', keysOf(kStages)).beginSubcases().combine('builtin', kBuiltins)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const code = kStages[t.params.stage](t.params.builtin); + t.expectCompileResult(t.params.stage === 'runtime', code); + }); + +g.test('must_use') + .desc('Tests that the builtin has the @must_use attribute') + .params(u => + u + .combine('must_use', [true, false] as const) + .beginSubcases() + .combine('builtin', kBuiltins) + ) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const wgsl = ` +enable subgroups; +@compute @workgroup_size(16) +fn main() { + ${t.params.must_use ? '_ = ' : ''}${t.params.builtin}(0); +}`; + + t.expectCompileResult(t.params.must_use, wgsl); + }); + +const kArgumentTypes = objectsToRecord(kAllScalarsAndVectors); + +g.test('data_type') + .desc('Validates data parameter type') + .params(u => + u.combine('type', keysOf(kArgumentTypes)).beginSubcases().combine('builtin', kBuiltins) + ) + .beforeAllSubcases(t => { + const features = ['subgroups' as GPUFeatureName]; + const type = kArgumentTypes[t.params.type]; + if (type.requiresF16()) { + features.push('subgroups-f16' as GPUFeatureName); + features.push('shader-f16'); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(t => { + const type = kArgumentTypes[t.params.type]; + let enables = `enable subgroups;\n`; + if (type.requiresF16()) { + enables += `enable subgroups_f16;\nenable f16;`; + } + const wgsl = ` +${enables} +@compute @workgroup_size(1) +fn main() { + _ = ${t.params.builtin}(${type.create(0).wgsl()}); +}`; + + t.expectCompileResult(elementTypeOf(type) !== Type.bool, wgsl); + }); + +g.test('return_type') + .desc('Validates data parameter type') + .params(u => + u + .combine('dataType', keysOf(kArgumentTypes)) + .combine('retType', keysOf(kArgumentTypes)) + .filter(t => { + const retType = kArgumentTypes[t.retType]; + const retEleTy = elementTypeOf(retType); + const dataType = kArgumentTypes[t.dataType]; + const dataEleTy = elementTypeOf(dataType); + return ( + retEleTy !== Type.abstractInt && + retEleTy !== Type.abstractFloat && + dataEleTy !== Type.abstractInt && + dataEleTy !== Type.abstractFloat + ); + }) + .beginSubcases() + .combine('builtin', kBuiltins) + ) + .beforeAllSubcases(t => { + const features = ['subgroups' as GPUFeatureName]; + const dataType = kArgumentTypes[t.params.dataType]; + const retType = kArgumentTypes[t.params.retType]; + if (dataType.requiresF16() || retType.requiresF16()) { + features.push('subgroups-f16' as GPUFeatureName); + features.push('shader-f16'); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(t => { + const dataType = kArgumentTypes[t.params.dataType]; + const retType = kArgumentTypes[t.params.retType]; + let enables = `enable subgroups;\n`; + if (dataType.requiresF16() || retType.requiresF16()) { + enables += `enable subgroups_f16;\nenable f16;`; + } + const wgsl = ` +${enables} +@compute @workgroup_size(1) +fn main() { + let res : ${retType.toString()} = ${t.params.builtin}(${dataType.create(0).wgsl()}); +}`; + + const expect = elementTypeOf(dataType) !== Type.bool && dataType === retType; + t.expectCompileResult(expect, wgsl); + }); + +g.test('stage') + .desc('Validates it is only usable in correct stage') + .params(u => + u + .combine('stage', ['compute', 'fragment', 'vertex'] as const) + .beginSubcases() + .combine('builtin', kBuiltins) + ) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const compute = ` +@compute @workgroup_size(1) +fn main() { + foo(); +}`; + + const fragment = ` +@fragment +fn main() { + foo(); +}`; + + const vertex = ` +@vertex +fn main() -> @builtin(position) vec4f { + foo(); + return vec4f(); +}`; + + const entry = { compute, fragment, vertex }[t.params.stage]; + const wgsl = ` +enable subgroups; +fn foo() { + _ = ${t.params.builtin}(0); +} + +${entry} +`; + + t.expectCompileResult(t.params.stage !== 'vertex', wgsl); + }); + +const kInvalidTypeCases: Record = { + array_u32: `array(1u,2u,3u)`, + array_f32: `array()`, + struct_s: `S()`, + struct_t: `T(1, 1)`, + ptr_func: `&func_var`, + ptr_priv: `&priv_var`, + frexp_ret: `frexp(0)`, +}; + +g.test('invalid_types') + .desc('Tests that invalid non-plain types are rejected') + .params(u => + u.combine('case', keysOf(kInvalidTypeCases)).beginSubcases().combine('builtin', kBuiltins) + ) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const val = kInvalidTypeCases[t.params.case]; + const wgsl = ` +enable subgroups; + +struct S { + x : u32 +} + +struct T { + a : f32, + b : u32, +} + +var priv_var : f32; +fn foo() { + var func_var : vec4u; + _ = ${t.params.builtin}(${val}); +}`; + + t.expectCompileResult(false, wgsl); + }); diff --git a/src/webgpu/shader/validation/expression/call/builtin/subgroupAnyAll.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/subgroupAnyAll.spec.ts new file mode 100644 index 000000000000..eaee33e62cff --- /dev/null +++ b/src/webgpu/shader/validation/expression/call/builtin/subgroupAnyAll.spec.ts @@ -0,0 +1,186 @@ +export const description = ` +Validation tests for subgroupAny and subgroupAll. +`; + +import { makeTestGroup } from '../../../../../../common/framework/test_group.js'; +import { keysOf, objectsToRecord } from '../../../../../../common/util/data_tables.js'; +import { Type, elementTypeOf, kAllScalarsAndVectors } from '../../../../../util/conversion.js'; +import { ShaderValidationTest } from '../../../shader_validation_test.js'; + +export const g = makeTestGroup(ShaderValidationTest); + +const kOps = ['subgroupAny', 'subgroupAll'] as const; + +g.test('requires_subgroups') + .desc('Validates that the subgroups feature is required') + .params(u => u.combine('enable', [false, true] as const).combine('op', kOps)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const wgsl = ` +${t.params.enable ? 'enable subgroups;' : ''} +fn foo() { + _ = ${t.params.op}(true); +}`; + + t.expectCompileResult(t.params.enable, wgsl); + }); + +const kStages: Record string> = { + constant: (op: string) => { + return ` +enable subgroups; +@compute @workgroup_size(16) +fn main() { + const x = ${op}(true); +}`; + }, + override: (op: string) => { + return ` +enable subgroups +override o = select(0, 1, ${op}(true));`; + }, + runtime: (op: string) => { + return ` +enable subgroups; +@compute @workgroup_size(16) +fn main() { + let x = ${op}(true); +}`; + }, +}; + +g.test('early_eval') + .desc('Ensures the builtin is not able to be compile time evaluated') + .params(u => u.combine('stage', keysOf(kStages)).combine('op', kOps)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const code = kStages[t.params.stage](t.params.op); + t.expectCompileResult(t.params.stage === 'runtime', code); + }); + +g.test('must_use') + .desc('Tests that the builtin has the @must_use attribute') + .params(u => u.combine('must_use', [true, false] as const).combine('op', kOps)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const wgsl = ` +enable subgroups; +@compute @workgroup_size(16) +fn main() { + ${t.params.must_use ? '_ = ' : ''}${t.params.op}(false); +}`; + + t.expectCompileResult(t.params.must_use, wgsl); + }); + +const kTypes = objectsToRecord(kAllScalarsAndVectors); + +g.test('data_type') + .desc('Validates data parameter type') + .params(u => u.combine('type', keysOf(kTypes)).combine('op', kOps)) + .beforeAllSubcases(t => { + const features = ['subgroups' as GPUFeatureName]; + const type = kTypes[t.params.type]; + if (type.requiresF16()) { + features.push('shader-f16'); + features.push('subgroups-f16' as GPUFeatureName); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(t => { + const type = kTypes[t.params.type]; + let enables = `enable subgroups;\n`; + if (type.requiresF16()) { + enables += `enable f16;\nenable subgroups_f16;`; + } + const wgsl = ` +${enables} +@compute @workgroup_size(1) +fn main() { + _ = ${t.params.op}(${type.create(0).wgsl()}); +}`; + + t.expectCompileResult(type === Type.bool, wgsl); + }); + +g.test('return_type') + .desc('Validates return type') + .params(u => + u + .combine('type', keysOf(kTypes)) + .filter(t => { + const type = kTypes[t.type]; + const eleType = elementTypeOf(type); + return eleType !== Type.abstractInt && eleType !== Type.abstractFloat; + }) + .combine('op', kOps) + ) + .beforeAllSubcases(t => { + const features = ['subgroups' as GPUFeatureName]; + const type = kTypes[t.params.type]; + if (type.requiresF16()) { + features.push('shader-f16'); + features.push('subgroups-f16' as GPUFeatureName); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(t => { + const type = kTypes[t.params.type]; + let enables = `enable subgroups;\n`; + if (type.requiresF16()) { + enables += `enable f16;\nenable subgroups_f16;`; + } + const wgsl = ` +${enables} +@compute @workgroup_size(1) +fn main() { + let res : ${type.toString()} = ${t.params.op}(true); +}`; + + t.expectCompileResult(type === Type.bool, wgsl); + }); + +g.test('stage') + .desc('validates builtin is only usable in the correct stages') + .params(u => u.combine('stage', ['compute', 'fragment', 'vertex'] as const).combine('op', kOps)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const compute = ` +@compute @workgroup_size(1) +fn main() { + foo(); +}`; + + const fragment = ` +@fragment +fn main() { + foo(); +}`; + + const vertex = ` +@vertex +fn main() -> @builtin(position) vec4f { + foo(); + return vec4f(); +}`; + + const entry = { compute, fragment, vertex }[t.params.stage]; + const wgsl = ` +enable subgroups; +fn foo() { + _ = ${t.params.op}(true); +} + +${entry} +`; + + t.expectCompileResult(t.params.stage !== 'vertex', wgsl); + }); diff --git a/src/webgpu/shader/validation/expression/call/builtin/subgroupBallot.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/subgroupBallot.spec.ts index afbe33e93c56..5f53847be25c 100644 --- a/src/webgpu/shader/validation/expression/call/builtin/subgroupBallot.spec.ts +++ b/src/webgpu/shader/validation/expression/call/builtin/subgroupBallot.spec.ts @@ -9,6 +9,22 @@ import { ShaderValidationTest } from '../../../shader_validation_test.js'; export const g = makeTestGroup(ShaderValidationTest); +g.test('requires_subgroups') + .desc('Validates that the subgroups feature is required') + .params(u => u.combine('enable', [false, true] as const)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const wgsl = ` +${t.params.enable ? 'enable subgroups;' : ''} +fn foo() { + _ = subgroupBallot(true); +}`; + + t.expectCompileResult(t.params.enable, wgsl); + }); + const kStages: Record = { constant: ` enable subgroups; @@ -38,6 +54,23 @@ g.test('early_eval') t.expectCompileResult(t.params.stage === 'runtime', code); }); +g.test('must_use') + .desc('Tests that the builtin has the @must_use attribute') + .params(u => u.combine('must_use', [true, false] as const)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const wgsl = ` +enable subgroups; +@compute @workgroup_size(16) +fn main() { + ${t.params.must_use ? '_ = ' : ''}subgroupBallot(true); +}`; + + t.expectCompileResult(t.params.must_use, wgsl); + }); + const kArgumentTypes = objectsToRecord(kAllScalarsAndVectors); g.test('data_type') @@ -69,7 +102,7 @@ fn main() { }); g.test('return_type') - .desc('Validates data parameter type') + .desc('Validates return type') .params(u => u.combine('type', keysOf(kArgumentTypes)).filter(t => { const type = kArgumentTypes[t.type]; diff --git a/src/webgpu/shader/validation/expression/call/builtin/subgroupBitwise.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/subgroupBitwise.spec.ts new file mode 100644 index 000000000000..ca0dfb6fd719 --- /dev/null +++ b/src/webgpu/shader/validation/expression/call/builtin/subgroupBitwise.spec.ts @@ -0,0 +1,204 @@ +export const description = ` +Validation tests for subgroupAnd, subgroupOr, and subgroupXor. +`; + +import { makeTestGroup } from '../../../../../../common/framework/test_group.js'; +import { keysOf, objectsToRecord } from '../../../../../../common/util/data_tables.js'; +import { + Type, + elementTypeOf, + kAllScalarsAndVectors, + isConvertible, +} from '../../../../../util/conversion.js'; +import { ShaderValidationTest } from '../../../shader_validation_test.js'; + +export const g = makeTestGroup(ShaderValidationTest); + +const kOps = ['subgroupAnd', 'subgroupOr', 'subgroupXor'] as const; + +g.test('requires_subgroups') + .desc('Validates that the subgroups feature is required') + .params(u => u.combine('enable', [false, true] as const).combine('op', kOps)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const wgsl = ` +${t.params.enable ? 'enable subgroups;' : ''} +fn foo() { + _ = ${t.params.op}(0); +}`; + + t.expectCompileResult(t.params.enable, wgsl); + }); + +const kStages: Record string> = { + constant: (op: string) => { + return ` +enable subgroups; +@compute @workgroup_size(16) +fn main() { + const x = ${op}(0); +}`; + }, + override: (op: string) => { + return ` +enable subgroups +override o = ${op}(0);`; + }, + runtime: (op: string) => { + return ` +enable subgroups; +@compute @workgroup_size(16) +fn main() { + let x = ${op}(0); +}`; + }, +}; + +g.test('early_eval') + .desc('Ensures the builtin is not able to be compile time evaluated') + .params(u => u.combine('stage', keysOf(kStages)).combine('op', kOps)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const code = kStages[t.params.stage](t.params.op); + t.expectCompileResult(t.params.stage === 'runtime', code); + }); + +g.test('must_use') + .desc('Tests that the builtin has the @must_use attribute') + .params(u => u.combine('must_use', [true, false] as const).combine('op', kOps)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const wgsl = ` +enable subgroups; +@compute @workgroup_size(16) +fn main() { + ${t.params.must_use ? '_ = ' : ''}${t.params.op}(0); +}`; + + t.expectCompileResult(t.params.must_use, wgsl); + }); + +const kTypes = objectsToRecord(kAllScalarsAndVectors); + +g.test('data_type') + .desc('Validates data parameter type') + .params(u => u.combine('type', keysOf(kTypes)).combine('op', kOps)) + .beforeAllSubcases(t => { + const features = ['subgroups' as GPUFeatureName]; + const type = kTypes[t.params.type]; + if (type.requiresF16()) { + features.push('shader-f16'); + features.push('subgroups-f16' as GPUFeatureName); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(t => { + const type = kTypes[t.params.type]; + let enables = `enable subgroups;\n`; + if (type.requiresF16()) { + enables += `enable f16;\nenable subgroups_f16;`; + } + const wgsl = ` +${enables} +@compute @workgroup_size(1) +fn main() { + _ = ${t.params.op}(${type.create(0).wgsl()}); +}`; + + const eleType = elementTypeOf(type); + const expect = isConvertible(eleType, Type.u32) || isConvertible(eleType, Type.i32); + t.expectCompileResult(expect, wgsl); + }); + +g.test('return_type') + .desc('Validates return type') + .params(u => + u + .combine('retType', keysOf(kTypes)) + .filter(t => { + const type = kTypes[t.retType]; + const eleType = elementTypeOf(type); + return eleType !== Type.abstractInt && eleType !== Type.abstractFloat; + }) + .combine('op', kOps) + .combine('paramType', keysOf(kTypes)) + ) + .beforeAllSubcases(t => { + const features = ['subgroups' as GPUFeatureName]; + const retType = kTypes[t.params.retType]; + const paramType = kTypes[t.params.paramType]; + if (retType.requiresF16() || paramType.requiresF16()) { + features.push('shader-f16'); + features.push('subgroups-f16' as GPUFeatureName); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(t => { + const retType = kTypes[t.params.retType]; + const paramType = kTypes[t.params.paramType]; + let enables = `enable subgroups;\n`; + if (retType.requiresF16() || paramType.requiresF16()) { + enables += `enable f16;\nenable subgroups_f16;`; + } + const wgsl = ` +${enables} +@compute @workgroup_size(1) +fn main() { + let res : ${retType.toString()} = ${t.params.op}(${paramType.create(0).wgsl()}); +}`; + + // Can't just use isConvertible since functions must concretize the parameter + // type before examining the whole statement. + const eleParamType = elementTypeOf(paramType); + const eleRetType = elementTypeOf(retType); + let expect = paramType === retType && (eleRetType === Type.i32 || eleRetType === Type.u32); + if (eleParamType === Type.abstractInt) { + expect = eleRetType === Type.i32 && isConvertible(paramType, retType); + } + t.expectCompileResult(expect, wgsl); + }); + +g.test('stage') + .desc('validates builtin is only usable in the correct stages') + .params(u => u.combine('stage', ['compute', 'fragment', 'vertex'] as const).combine('op', kOps)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const compute = ` +@compute @workgroup_size(1) +fn main() { + foo(); +}`; + + const fragment = ` +@fragment +fn main() { + foo(); +}`; + + const vertex = ` +@vertex +fn main() -> @builtin(position) vec4f { + foo(); + return vec4f(); +}`; + + const entry = { compute, fragment, vertex }[t.params.stage]; + const wgsl = ` +enable subgroups; +fn foo() { + _ = ${t.params.op}(0); +} + +${entry} +`; + + t.expectCompileResult(t.params.stage !== 'vertex', wgsl); + }); diff --git a/src/webgpu/shader/validation/expression/call/builtin/subgroupBroadcast.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/subgroupBroadcast.spec.ts index a71b145092c8..fd76cd419b7f 100644 --- a/src/webgpu/shader/validation/expression/call/builtin/subgroupBroadcast.spec.ts +++ b/src/webgpu/shader/validation/expression/call/builtin/subgroupBroadcast.spec.ts @@ -14,6 +14,44 @@ import { ShaderValidationTest } from '../../../shader_validation_test.js'; export const g = makeTestGroup(ShaderValidationTest); +g.test('requires_subgroups') + .desc('Validates that the subgroups feature is required') + .params(u => u.combine('enable', [false, true] as const)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const wgsl = ` +${t.params.enable ? 'enable subgroups;' : ''} +fn foo() { + _ = subgroupBroadcast(0, 0); +}`; + + t.expectCompileResult(t.params.enable, wgsl); + }); + +g.test('requires_subgroups_f16') + .desc('Validates that the subgroups feature is required') + .params(u => u.combine('enable', [false, true] as const)) + .beforeAllSubcases(t => { + const features: GPUFeatureName[] = ['shader-f16', 'subgroups' as GPUFeatureName]; + if (t.params.enable) { + features.push('subgroups-f16' as GPUFeatureName); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(t => { + const wgsl = ` +enable f16; +enable subgroups; +${t.params.enable ? 'enable subgroups_f16;' : ''} +fn foo() { + _ = subgroupBroadcast(0h, 0); +}`; + + t.expectCompileResult(t.params.enable, wgsl); + }); + const kArgumentTypes = objectsToRecord(kAllScalarsAndVectors); const kStages: Record = { @@ -156,6 +194,58 @@ fn main() { t.expectCompileResult(expect, wgsl); }); +const kIdCases = { + const_decl: { + code: 'const_decl', + valid: true, + }, + const_literal: { + code: '0', + valid: true, + }, + const_expr: { + code: 'const_decl + 2', + valid: true, + }, + let_decl: { + code: 'let_decl', + valid: false, + }, + override_decl: { + code: 'override_decl', + valid: false, + }, + var_func_decl: { + code: 'var_func_decl', + valid: false, + }, + var_priv_decl: { + code: 'var_priv_decl', + valid: false, + }, +}; + +g.test('id_constness') + .desc('Validates that id must be a const-expression') + .params(u => u.combine('value', keysOf(kIdCases))) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const wgsl = ` +enable subgroups; +override override_decl : u32; +var var_priv_decl : u32; +fn foo() { + var var_func_decl : u32; + let let_decl = var_func_decl; + const const_decl = 0u; + _ = subgroupBroadcast(0, ${kIdCases[t.params.value].code}); +}`; + + t.expectCompileResult(kIdCases[t.params.value].valid, wgsl); + }); + g.test('stage') .desc('Validates it is only usable in correct stage') .params(u => u.combine('stage', ['compute', 'fragment', 'vertex'] as const)) diff --git a/src/webgpu/shader/validation/expression/call/builtin/subgroupBroadcastFirst.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/subgroupBroadcastFirst.spec.ts new file mode 100644 index 000000000000..4525b6b97ef8 --- /dev/null +++ b/src/webgpu/shader/validation/expression/call/builtin/subgroupBroadcastFirst.spec.ts @@ -0,0 +1,210 @@ +export const description = ` +Validation tests for subgroupBroadcastFirst +`; + +import { makeTestGroup } from '../../../../../../common/framework/test_group.js'; +import { keysOf, objectsToRecord } from '../../../../../../common/util/data_tables.js'; +import { Type, elementTypeOf, kAllScalarsAndVectors } from '../../../../../util/conversion.js'; +import { ShaderValidationTest } from '../../../shader_validation_test.js'; + +export const g = makeTestGroup(ShaderValidationTest); + +g.test('requires_subgroups') + .desc('Validates that the subgroups feature is required') + .params(u => u.combine('enable', [false, true] as const)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const wgsl = ` +${t.params.enable ? 'enable subgroups;' : ''} +fn foo() { + _ = subgroupBroadcastFirst(0); +}`; + + t.expectCompileResult(t.params.enable, wgsl); + }); + +g.test('requires_subgroups_f16') + .desc('Validates that the subgroups feature is required') + .params(u => u.combine('enable', [false, true] as const)) + .beforeAllSubcases(t => { + const features: GPUFeatureName[] = ['shader-f16', 'subgroups' as GPUFeatureName]; + if (t.params.enable) { + features.push('subgroups-f16' as GPUFeatureName); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(t => { + const wgsl = ` +enable f16; +enable subgroups; +${t.params.enable ? 'enable subgroups_f16;' : ''} +fn foo() { + _ = subgroupBroadcastFirst(0h); +}`; + + t.expectCompileResult(t.params.enable, wgsl); + }); + +const kArgumentTypes = objectsToRecord(kAllScalarsAndVectors); + +const kStages: Record = { + constant: ` +enable subgroups; +@compute @workgroup_size(16) +fn main() { + const x = subgroupBroadcastFirst(0); +}`, + override: ` +enable subgroups; +override o = subgroupBroadcastFirst(0);`, + runtime: ` +enable subgroups; +@compute @workgroup_size(16) +fn main() { + let x = subgroupBroadcastFirst(0); +}`, +}; + +g.test('early_eval') + .desc('Ensures the builtin is not able to be compile time evaluated') + .params(u => u.combine('stage', keysOf(kStages))) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const code = kStages[t.params.stage]; + t.expectCompileResult(t.params.stage === 'runtime', code); + }); + +g.test('must_use') + .desc('Tests that the builtin has the @must_use attribute') + .params(u => u.combine('must_use', [true, false] as const)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const wgsl = ` +enable subgroups; +@compute @workgroup_size(16) +fn main() { + ${t.params.must_use ? '_ = ' : ''}subgroupBroadcastFirst(0); +}`; + + t.expectCompileResult(t.params.must_use, wgsl); + }); + +g.test('data_type') + .desc('Validates data parameter type') + .params(u => u.combine('type', keysOf(kArgumentTypes))) + .beforeAllSubcases(t => { + const features = ['subgroups' as GPUFeatureName]; + const type = kArgumentTypes[t.params.type]; + if (type.requiresF16()) { + features.push('subgroups-f16' as GPUFeatureName); + features.push('shader-f16'); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(t => { + const type = kArgumentTypes[t.params.type]; + let enables = `enable subgroups;\n`; + if (type.requiresF16()) { + enables += `enable subgroups_f16;\nenable f16;`; + } + const wgsl = ` +${enables} +@compute @workgroup_size(1) +fn main() { + _ = subgroupBroadcastFirst(${type.create(0).wgsl()}); +}`; + + t.expectCompileResult(elementTypeOf(type) !== Type.bool, wgsl); + }); + +g.test('return_type') + .desc('Validates data parameter type') + .params(u => + u + .combine('dataType', keysOf(kArgumentTypes)) + .combine('retType', keysOf(kArgumentTypes)) + .filter(t => { + const retType = kArgumentTypes[t.retType]; + const retEleTy = elementTypeOf(retType); + const dataType = kArgumentTypes[t.dataType]; + const dataEleTy = elementTypeOf(dataType); + return ( + retEleTy !== Type.abstractInt && + retEleTy !== Type.abstractFloat && + dataEleTy !== Type.abstractInt && + dataEleTy !== Type.abstractFloat + ); + }) + ) + .beforeAllSubcases(t => { + const features = ['subgroups' as GPUFeatureName]; + const dataType = kArgumentTypes[t.params.dataType]; + const retType = kArgumentTypes[t.params.retType]; + if (dataType.requiresF16() || retType.requiresF16()) { + features.push('subgroups-f16' as GPUFeatureName); + features.push('shader-f16'); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(t => { + const dataType = kArgumentTypes[t.params.dataType]; + const retType = kArgumentTypes[t.params.retType]; + let enables = `enable subgroups;\n`; + if (dataType.requiresF16() || retType.requiresF16()) { + enables += `enable subgroups_f16;\nenable f16;`; + } + const wgsl = ` +${enables} +@compute @workgroup_size(1) +fn main() { + let res : ${retType.toString()} = subgroupBroadcastFirst(${dataType.create(0).wgsl()}); +}`; + + const expect = elementTypeOf(dataType) !== Type.bool && dataType === retType; + t.expectCompileResult(expect, wgsl); + }); + +g.test('stage') + .desc('Validates it is only usable in correct stage') + .params(u => u.combine('stage', ['compute', 'fragment', 'vertex'] as const)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const compute = ` +@compute @workgroup_size(1) +fn main() { + foo(); +}`; + + const fragment = ` +@fragment +fn main() { + foo(); +}`; + + const vertex = ` +@vertex +fn main() -> @builtin(position) vec4f { + foo(); + return vec4f(); +}`; + + const entry = { compute, fragment, vertex }[t.params.stage]; + const wgsl = ` +enable subgroups; +fn foo() { + _ = subgroupBroadcastFirst(0); +} + +${entry} +`; + + t.expectCompileResult(t.params.stage !== 'vertex', wgsl); + }); diff --git a/src/webgpu/shader/validation/expression/call/builtin/subgroupElect.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/subgroupElect.spec.ts new file mode 100644 index 000000000000..5637860c59ce --- /dev/null +++ b/src/webgpu/shader/validation/expression/call/builtin/subgroupElect.spec.ts @@ -0,0 +1,175 @@ +export const description = ` +Validation tests for subgroupElect. +`; + +import { makeTestGroup } from '../../../../../../common/framework/test_group.js'; +import { keysOf, objectsToRecord } from '../../../../../../common/util/data_tables.js'; +import { Type, elementTypeOf, kAllScalarsAndVectors } from '../../../../../util/conversion.js'; +import { ShaderValidationTest } from '../../../shader_validation_test.js'; + +export const g = makeTestGroup(ShaderValidationTest); + +g.test('requires_subgroups') + .desc('Validates that the subgroups feature is required') + .params(u => u.combine('enable', [false, true] as const)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const wgsl = ` +${t.params.enable ? 'enable subgroups;' : ''} +fn foo() { + _ = subgroupElect(); +}`; + + t.expectCompileResult(t.params.enable, wgsl); + }); + +const kStages: Record = { + constant: ` +enable subgroups; +@compute @workgroup_size(16) +fn main() { + const x = subgroupElect(); +}`, + override: ` +enable subgroups +override o = select(0, 1, subgroupElect());`, + runtime: ` +enable subgroups; +@compute @workgroup_size(16) +fn main() { + let x = subgroupElect(); +}`, +}; + +g.test('early_eval') + .desc('Ensures the builtin is not able to be compile time evaluated') + .params(u => u.combine('stage', keysOf(kStages))) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const code = kStages[t.params.stage]; + t.expectCompileResult(t.params.stage === 'runtime', code); + }); + +g.test('must_use') + .desc('Tests that the builtin has the @must_use attribute') + .params(u => u.combine('must_use', [true, false] as const)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const wgsl = ` +enable subgroups; +@compute @workgroup_size(16) +fn main() { + ${t.params.must_use ? '_ = ' : ''}subgroupElect(); +}`; + + t.expectCompileResult(t.params.must_use, wgsl); + }); + +const kTypes = objectsToRecord(kAllScalarsAndVectors); + +g.test('data_type') + .desc('Validates there are no valid data parameters') + .params(u => u.combine('type', keysOf(kTypes))) + .beforeAllSubcases(t => { + const features = ['subgroups' as GPUFeatureName]; + const type = kTypes[t.params.type]; + if (type.requiresF16()) { + features.push('shader-f16'); + features.push('subgroups-f16' as GPUFeatureName); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(t => { + const type = kTypes[t.params.type]; + let enables = `enable subgroups;\n`; + if (type.requiresF16()) { + enables += `enable f16;\nenable subgroups_f16;`; + } + const wgsl = ` +${enables} +@compute @workgroup_size(1) +fn main() { + _ = subgroupElect(${type.create(0).wgsl()}); +}`; + + t.expectCompileResult(false, wgsl); + }); + +g.test('return_type') + .desc('Validates return type') + .params(u => + u.combine('type', keysOf(kTypes)).filter(t => { + const type = kTypes[t.type]; + const eleType = elementTypeOf(type); + return eleType !== Type.abstractInt && eleType !== Type.abstractFloat; + }) + ) + .beforeAllSubcases(t => { + const features = ['subgroups' as GPUFeatureName]; + const type = kTypes[t.params.type]; + if (type.requiresF16()) { + features.push('shader-f16'); + features.push('subgroups-f16' as GPUFeatureName); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(t => { + const type = kTypes[t.params.type]; + let enables = `enable subgroups;\n`; + if (type.requiresF16()) { + enables += `enable f16;\nenable subgroups_f16;`; + } + const wgsl = ` +${enables} +@compute @workgroup_size(1) +fn main() { + let res : ${type.toString()} = subgroupElect(); +}`; + + t.expectCompileResult(type === Type.bool, wgsl); + }); + +g.test('stage') + .desc('validates builtin is only usable in the correct stages') + .params(u => u.combine('stage', ['compute', 'fragment', 'vertex'] as const)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const compute = ` +@compute @workgroup_size(1) +fn main() { + foo(); +}`; + + const fragment = ` +@fragment +fn main() { + foo(); +}`; + + const vertex = ` +@vertex +fn main() -> @builtin(position) vec4f { + foo(); + return vec4f(); +}`; + + const entry = { compute, fragment, vertex }[t.params.stage]; + const wgsl = ` +enable subgroups; +fn foo() { + _ = subgroupElect(); +} + +${entry} +`; + + t.expectCompileResult(t.params.stage !== 'vertex', wgsl); + }); diff --git a/src/webgpu/shader/validation/expression/call/builtin/subgroupMinMax.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/subgroupMinMax.spec.ts new file mode 100644 index 000000000000..84c1860019ee --- /dev/null +++ b/src/webgpu/shader/validation/expression/call/builtin/subgroupMinMax.spec.ts @@ -0,0 +1,227 @@ +export const description = ` +Validation tests for subgroupMin and subgroupMax. +`; + +import { makeTestGroup } from '../../../../../../common/framework/test_group.js'; +import { keysOf, objectsToRecord } from '../../../../../../common/util/data_tables.js'; +import { + Type, + elementTypeOf, + kAllScalarsAndVectors, + isConvertible, +} from '../../../../../util/conversion.js'; +import { ShaderValidationTest } from '../../../shader_validation_test.js'; + +export const g = makeTestGroup(ShaderValidationTest); + +const kOps = ['subgroupMin', 'subgroupMax'] as const; + +g.test('requires_subgroups') + .desc('Validates that the subgroups feature is required') + .params(u => u.combine('enable', [false, true] as const).combine('op', kOps)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const wgsl = ` +${t.params.enable ? 'enable subgroups;' : ''} +fn foo() { + _ = ${t.params.op}(0); +}`; + + t.expectCompileResult(t.params.enable, wgsl); + }); + +g.test('requires_subgroups_f16') + .desc('Validates that the subgroups feature is required') + .params(u => u.combine('enable', [false, true] as const).combine('op', kOps)) + .beforeAllSubcases(t => { + const features: GPUFeatureName[] = ['shader-f16', 'subgroups' as GPUFeatureName]; + if (t.params.enable) { + features.push('subgroups-f16' as GPUFeatureName); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(t => { + const wgsl = ` +enable f16; +enable subgroups; +${t.params.enable ? 'enable subgroups_f16;' : ''} +fn foo() { + _ = ${t.params.op}(0h); +}`; + + t.expectCompileResult(t.params.enable, wgsl); + }); + +const kStages: Record string> = { + constant: (op: string) => { + return ` +enable subgroups; +@compute @workgroup_size(16) +fn main() { + const x = ${op}(0); +}`; + }, + override: (op: string) => { + return ` +enable subgroups +override o = ${op}(0);`; + }, + runtime: (op: string) => { + return ` +enable subgroups; +@compute @workgroup_size(16) +fn main() { + let x = ${op}(0); +}`; + }, +}; + +g.test('early_eval') + .desc('Ensures the builtin is not able to be compile time evaluated') + .params(u => u.combine('stage', keysOf(kStages)).combine('op', kOps)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const code = kStages[t.params.stage](t.params.op); + t.expectCompileResult(t.params.stage === 'runtime', code); + }); + +g.test('must_use') + .desc('Tests that the builtin has the @must_use attribute') + .params(u => u.combine('must_use', [true, false] as const).combine('op', kOps)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const wgsl = ` +enable subgroups; +@compute @workgroup_size(16) +fn main() { + ${t.params.must_use ? '_ = ' : ''}${t.params.op}(0); +}`; + + t.expectCompileResult(t.params.must_use, wgsl); + }); + +const kTypes = objectsToRecord(kAllScalarsAndVectors); + +g.test('data_type') + .desc('Validates data parameter type') + .params(u => u.combine('type', keysOf(kTypes)).combine('op', kOps)) + .beforeAllSubcases(t => { + const features = ['subgroups' as GPUFeatureName]; + const type = kTypes[t.params.type]; + if (type.requiresF16()) { + features.push('shader-f16'); + features.push('subgroups-f16' as GPUFeatureName); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(t => { + const type = kTypes[t.params.type]; + let enables = `enable subgroups;\n`; + if (type.requiresF16()) { + enables += `enable f16;\nenable subgroups_f16;`; + } + const wgsl = ` +${enables} +@compute @workgroup_size(1) +fn main() { + _ = ${t.params.op}(${type.create(0).wgsl()}); +}`; + + const eleType = elementTypeOf(type); + t.expectCompileResult(eleType !== Type.bool, wgsl); + }); + +g.test('return_type') + .desc('Validates return type') + .params(u => + u + .combine('retType', keysOf(kTypes)) + .filter(t => { + const type = kTypes[t.retType]; + const eleType = elementTypeOf(type); + return eleType !== Type.abstractInt && eleType !== Type.abstractFloat; + }) + .combine('op', kOps) + .combine('paramType', keysOf(kTypes)) + ) + .beforeAllSubcases(t => { + const features = ['subgroups' as GPUFeatureName]; + const retType = kTypes[t.params.retType]; + const paramType = kTypes[t.params.paramType]; + if (retType.requiresF16() || paramType.requiresF16()) { + features.push('shader-f16'); + features.push('subgroups-f16' as GPUFeatureName); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(t => { + const retType = kTypes[t.params.retType]; + const paramType = kTypes[t.params.paramType]; + let enables = `enable subgroups;\n`; + if (retType.requiresF16() || paramType.requiresF16()) { + enables += `enable f16;\nenable subgroups_f16;`; + } + const wgsl = ` +${enables} +@compute @workgroup_size(1) +fn main() { + let res : ${retType.toString()} = ${t.params.op}(${paramType.create(0).wgsl()}); +}`; + + // Can't just use isConvertible since functions must concretize the parameter + // type before examining the whole statement. + const eleParamType = elementTypeOf(paramType); + const eleRetType = elementTypeOf(retType); + let expect = paramType === retType && eleRetType !== Type.bool; + if (eleParamType === Type.abstractInt) { + expect = eleRetType === Type.i32 && isConvertible(paramType, retType); + } else if (eleParamType === Type.abstractFloat) { + expect = eleRetType === Type.f32 && isConvertible(paramType, retType); + } + t.expectCompileResult(expect, wgsl); + }); + +g.test('stage') + .desc('validates builtin is only usable in the correct stages') + .params(u => u.combine('stage', ['compute', 'fragment', 'vertex'] as const).combine('op', kOps)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const compute = ` +@compute @workgroup_size(1) +fn main() { + foo(); +}`; + + const fragment = ` +@fragment +fn main() { + foo(); +}`; + + const vertex = ` +@vertex +fn main() -> @builtin(position) vec4f { + foo(); + return vec4f(); +}`; + + const entry = { compute, fragment, vertex }[t.params.stage]; + const wgsl = ` +enable subgroups; +fn foo() { + _ = ${t.params.op}(0); +} + +${entry} +`; + + t.expectCompileResult(t.params.stage !== 'vertex', wgsl); + }); diff --git a/src/webgpu/shader/validation/expression/call/builtin/subgroupMul.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/subgroupMul.spec.ts new file mode 100644 index 000000000000..0b50d4c9df2d --- /dev/null +++ b/src/webgpu/shader/validation/expression/call/builtin/subgroupMul.spec.ts @@ -0,0 +1,235 @@ +export const description = ` +Validation tests for subgroupMul, subgroupExclusiveMul, and subgroupInclusiveMul +`; + +import { makeTestGroup } from '../../../../../../common/framework/test_group.js'; +import { keysOf, objectsToRecord } from '../../../../../../common/util/data_tables.js'; +import { Type, elementTypeOf, kAllScalarsAndVectors } from '../../../../../util/conversion.js'; +import { ShaderValidationTest } from '../../../shader_validation_test.js'; + +export const g = makeTestGroup(ShaderValidationTest); + +const kBuiltins = ['subgroupMul', 'subgroupExclusiveMul', 'subgroupInclusiveMul'] as const; + +const kStages: Record string> = { + constant: (builtin: string) => { + return ` +enable subgroups; +@compute @workgroup_size(16) +fn main() { + const x = ${builtin}(0); +}`; + }, + override: (builtin: string) => { + return ` +enable subgroups; +override o = ${builtin}(0);`; + }, + runtime: (builtin: string) => { + return ` +enable subgroups; +@compute @workgroup_size(16) +fn main() { + let x = ${builtin}(0); +}`; + }, +}; + +g.test('early_eval') + .desc('Ensures the builtin is not able to be compile time evaluated') + .params(u => u.combine('stage', keysOf(kStages)).beginSubcases().combine('builtin', kBuiltins)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const code = kStages[t.params.stage](t.params.builtin); + t.expectCompileResult(t.params.stage === 'runtime', code); + }); + +g.test('must_use') + .desc('Tests that the builtin has the @must_use attribute') + .params(u => + u + .combine('must_use', [true, false] as const) + .beginSubcases() + .combine('builtin', kBuiltins) + ) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const wgsl = ` +enable subgroups; +@compute @workgroup_size(16) +fn main() { + ${t.params.must_use ? '_ = ' : ''}${t.params.builtin}(0); +}`; + + t.expectCompileResult(t.params.must_use, wgsl); + }); + +const kArgumentTypes = objectsToRecord(kAllScalarsAndVectors); + +g.test('data_type') + .desc('Validates data parameter type') + .params(u => + u.combine('type', keysOf(kArgumentTypes)).beginSubcases().combine('builtin', kBuiltins) + ) + .beforeAllSubcases(t => { + const features = ['subgroups' as GPUFeatureName]; + const type = kArgumentTypes[t.params.type]; + if (type.requiresF16()) { + features.push('subgroups-f16' as GPUFeatureName); + features.push('shader-f16'); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(t => { + const type = kArgumentTypes[t.params.type]; + let enables = `enable subgroups;\n`; + if (type.requiresF16()) { + enables += `enable subgroups_f16;\nenable f16;`; + } + const wgsl = ` +${enables} +@compute @workgroup_size(1) +fn main() { + _ = ${t.params.builtin}(${type.create(0).wgsl()}); +}`; + + t.expectCompileResult(elementTypeOf(type) !== Type.bool, wgsl); + }); + +g.test('return_type') + .desc('Validates data parameter type') + .params(u => + u + .combine('dataType', keysOf(kArgumentTypes)) + .combine('retType', keysOf(kArgumentTypes)) + .filter(t => { + const retType = kArgumentTypes[t.retType]; + const retEleTy = elementTypeOf(retType); + const dataType = kArgumentTypes[t.dataType]; + const dataEleTy = elementTypeOf(dataType); + return ( + retEleTy !== Type.abstractInt && + retEleTy !== Type.abstractFloat && + dataEleTy !== Type.abstractInt && + dataEleTy !== Type.abstractFloat + ); + }) + .beginSubcases() + .combine('builtin', kBuiltins) + ) + .beforeAllSubcases(t => { + const features = ['subgroups' as GPUFeatureName]; + const dataType = kArgumentTypes[t.params.dataType]; + const retType = kArgumentTypes[t.params.retType]; + if (dataType.requiresF16() || retType.requiresF16()) { + features.push('subgroups-f16' as GPUFeatureName); + features.push('shader-f16'); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(t => { + const dataType = kArgumentTypes[t.params.dataType]; + const retType = kArgumentTypes[t.params.retType]; + let enables = `enable subgroups;\n`; + if (dataType.requiresF16() || retType.requiresF16()) { + enables += `enable subgroups_f16;\nenable f16;`; + } + const wgsl = ` +${enables} +@compute @workgroup_size(1) +fn main() { + let res : ${retType.toString()} = ${t.params.builtin}(${dataType.create(0).wgsl()}); +}`; + + const expect = elementTypeOf(dataType) !== Type.bool && dataType === retType; + t.expectCompileResult(expect, wgsl); + }); + +g.test('stage') + .desc('Validates it is only usable in correct stage') + .params(u => + u + .combine('stage', ['compute', 'fragment', 'vertex'] as const) + .beginSubcases() + .combine('builtin', kBuiltins) + ) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const compute = ` +@compute @workgroup_size(1) +fn main() { + foo(); +}`; + + const fragment = ` +@fragment +fn main() { + foo(); +}`; + + const vertex = ` +@vertex +fn main() -> @builtin(position) vec4f { + foo(); + return vec4f(); +}`; + + const entry = { compute, fragment, vertex }[t.params.stage]; + const wgsl = ` +enable subgroups; +fn foo() { + _ = ${t.params.builtin}(0); +} + +${entry} +`; + + t.expectCompileResult(t.params.stage !== 'vertex', wgsl); + }); + +const kInvalidTypeCases: Record = { + array_u32: `array(1u,2u,3u)`, + array_f32: `array()`, + struct_s: `S()`, + struct_t: `T(1, 1)`, + ptr_func: `&func_var`, + ptr_priv: `&priv_var`, + frexp_ret: `frexp(0)`, +}; + +g.test('invalid_types') + .desc('Tests that invalid non-plain types are rejected') + .params(u => + u.combine('case', keysOf(kInvalidTypeCases)).beginSubcases().combine('builtin', kBuiltins) + ) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const val = kInvalidTypeCases[t.params.case]; + const wgsl = ` +enable subgroups; + +struct S { + x : u32 +} + +struct T { + a : f32, + b : u32, +} + +var priv_var : f32; +fn foo() { + var func_var : vec4u; + _ = ${t.params.builtin}(${val}); +}`; + + t.expectCompileResult(false, wgsl); + }); diff --git a/src/webgpu/shader/validation/expression/call/builtin/subgroupShuffle.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/subgroupShuffle.spec.ts new file mode 100644 index 000000000000..62ffb5af36dd --- /dev/null +++ b/src/webgpu/shader/validation/expression/call/builtin/subgroupShuffle.spec.ts @@ -0,0 +1,262 @@ +export const description = ` +Validation tests for subgroupShuffle, subgroupShuffleXor, subgroupShuffleUp, and subgroupShuffleDown. +`; + +import { makeTestGroup } from '../../../../../../common/framework/test_group.js'; +import { keysOf, objectsToRecord } from '../../../../../../common/util/data_tables.js'; +import { + Type, + elementTypeOf, + kAllScalarsAndVectors, + isConvertible, +} from '../../../../../util/conversion.js'; +import { ShaderValidationTest } from '../../../shader_validation_test.js'; + +export const g = makeTestGroup(ShaderValidationTest); + +const kOps = [ + 'subgroupShuffle', + 'subgroupShuffleXor', + 'subgroupShuffleUp', + 'subgroupShuffleDown', +] as const; + +g.test('requires_subgroups') + .desc('Validates that the subgroups feature is required') + .params(u => u.combine('enable', [false, true] as const).combine('op', kOps)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const wgsl = ` +${t.params.enable ? 'enable subgroups;' : ''} +fn foo() { + _ = ${t.params.op}(0, 0); +}`; + + t.expectCompileResult(t.params.enable, wgsl); + }); + +g.test('requires_subgroups_f16') + .desc('Validates that the subgroups feature is required') + .params(u => u.combine('enable', [false, true] as const).combine('op', kOps)) + .beforeAllSubcases(t => { + const features: GPUFeatureName[] = ['shader-f16', 'subgroups' as GPUFeatureName]; + if (t.params.enable) { + features.push('subgroups-f16' as GPUFeatureName); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(t => { + const wgsl = ` +enable f16; +enable subgroups; +${t.params.enable ? 'enable subgroups_f16;' : ''} +fn foo() { + _ = ${t.params.op}(0h, 0); +}`; + + t.expectCompileResult(t.params.enable, wgsl); + }); + +const kStages: Record string> = { + constant: (op: string) => { + return ` +enable subgroups; +@compute @workgroup_size(16) +fn main() { + const x = ${op}(0, 0); +}`; + }, + override: (op: string) => { + return ` +enable subgroups +override o = ${op}(0, 0);`; + }, + runtime: (op: string) => { + return ` +enable subgroups; +@compute @workgroup_size(16) +fn main() { + let x = ${op}(0, 0); +}`; + }, +}; + +g.test('early_eval') + .desc('Ensures the builtin is not able to be compile time evaluated') + .params(u => u.combine('stage', keysOf(kStages)).combine('op', kOps)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const code = kStages[t.params.stage](t.params.op); + t.expectCompileResult(t.params.stage === 'runtime', code); + }); + +g.test('must_use') + .desc('Tests that the builtin has the @must_use attribute') + .params(u => u.combine('must_use', [true, false] as const).combine('op', kOps)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const wgsl = ` +enable subgroups; +@compute @workgroup_size(16) +fn main() { + ${t.params.must_use ? '_ = ' : ''}${t.params.op}(0, 0); +}`; + + t.expectCompileResult(t.params.must_use, wgsl); + }); + +const kTypes = objectsToRecord(kAllScalarsAndVectors); + +g.test('data_type') + .desc('Validates data parameter type') + .params(u => u.combine('type', keysOf(kTypes)).combine('op', kOps)) + .beforeAllSubcases(t => { + const features = ['subgroups' as GPUFeatureName]; + const type = kTypes[t.params.type]; + if (type.requiresF16()) { + features.push('shader-f16'); + features.push('subgroups-f16' as GPUFeatureName); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(t => { + const type = kTypes[t.params.type]; + let enables = `enable subgroups;\n`; + if (type.requiresF16()) { + enables += `enable f16;\nenable subgroups_f16;`; + } + const wgsl = ` +${enables} +@compute @workgroup_size(1) +fn main() { + _ = ${t.params.op}(${type.create(0).wgsl()}, 0); +}`; + + const eleType = elementTypeOf(type); + t.expectCompileResult(eleType !== Type.bool, wgsl); + }); + +g.test('return_type') + .desc('Validates return type') + .params(u => + u + .combine('retType', keysOf(kTypes)) + .filter(t => { + const type = kTypes[t.retType]; + const eleType = elementTypeOf(type); + return eleType !== Type.abstractInt && eleType !== Type.abstractFloat; + }) + .combine('op', kOps) + .combine('paramType', keysOf(kTypes)) + ) + .beforeAllSubcases(t => { + const features = ['subgroups' as GPUFeatureName]; + const retType = kTypes[t.params.retType]; + const paramType = kTypes[t.params.paramType]; + if (retType.requiresF16() || paramType.requiresF16()) { + features.push('shader-f16'); + features.push('subgroups-f16' as GPUFeatureName); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(t => { + const retType = kTypes[t.params.retType]; + const paramType = kTypes[t.params.paramType]; + let enables = `enable subgroups;\n`; + if (retType.requiresF16() || paramType.requiresF16()) { + enables += `enable f16;\nenable subgroups_f16;`; + } + const wgsl = ` +${enables} +@compute @workgroup_size(1) +fn main() { + let res : ${retType.toString()} = ${t.params.op}(${paramType.create(0).wgsl()}, 0); +}`; + + // Can't just use isConvertible since functions must concretize the parameter + // type before examining the whole statement. + const eleParamType = elementTypeOf(paramType); + const eleRetType = elementTypeOf(retType); + let expect = paramType === retType && eleRetType !== Type.bool; + if (eleParamType === Type.abstractInt) { + expect = eleRetType === Type.i32 && isConvertible(paramType, retType); + } else if (eleParamType === Type.abstractFloat) { + expect = eleRetType === Type.f32 && isConvertible(paramType, retType); + } + t.expectCompileResult(expect, wgsl); + }); + +g.test('param2_type') + .desc('Validates shuffle parameter type') + .params(u => u.combine('type', keysOf(kTypes)).combine('op', kOps)) + .beforeAllSubcases(t => { + const features = ['subgroups' as GPUFeatureName]; + const type = kTypes[t.params.type]; + if (type.requiresF16()) { + features.push('shader-f16'); + features.push('subgroups-f16' as GPUFeatureName); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(t => { + const type = kTypes[t.params.type]; + let enables = `enable subgroups;\n`; + if (type.requiresF16()) { + enables += `enable f16;\nenable subgroups_f16;`; + } + const wgsl = ` +${enables} +@compute @workgroup_size(1) +fn main() { + _ = ${t.params.op}(0, ${type.create(0).wgsl()}); +}`; + + const expect = + isConvertible(type, Type.u32) || (type === Type.i32 && t.params.op === 'subgroupShuffle'); + t.expectCompileResult(expect, wgsl); + }); + +g.test('stage') + .desc('validates builtin is only usable in the correct stages') + .params(u => u.combine('stage', ['compute', 'fragment', 'vertex'] as const).combine('op', kOps)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const compute = ` +@compute @workgroup_size(1) +fn main() { + foo(); +}`; + + const fragment = ` +@fragment +fn main() { + foo(); +}`; + + const vertex = ` +@vertex +fn main() -> @builtin(position) vec4f { + foo(); + return vec4f(); +}`; + + const entry = { compute, fragment, vertex }[t.params.stage]; + const wgsl = ` +enable subgroups; +fn foo() { + _ = ${t.params.op}(0, 0); +} + +${entry} +`; + + t.expectCompileResult(t.params.stage !== 'vertex', wgsl); + }); diff --git a/src/webgpu/shader/validation/expression/matrix/add_sub.spec.ts b/src/webgpu/shader/validation/expression/matrix/add_sub.spec.ts index 85bed5228482..d162ba3286b9 100644 --- a/src/webgpu/shader/validation/expression/matrix/add_sub.spec.ts +++ b/src/webgpu/shader/validation/expression/matrix/add_sub.spec.ts @@ -275,7 +275,7 @@ g.test('underflow_f16') let rhs = `mat${t.params.c}x${t.params.r}h(`; for (let i = 0; i < t.params.c; i++) { for (let k = 0; k < t.params.r; k++) { - lhs += `${kValue.f32.negative.min / 2},`; + lhs += `${kValue.f16.negative.min / 2},`; rhs += `${t.params.rhs},`; } } diff --git a/src/webgpu/shader/validation/expression/matrix/mul.spec.ts b/src/webgpu/shader/validation/expression/matrix/mul.spec.ts index e76e40265e09..a3a5d368dc2d 100644 --- a/src/webgpu/shader/validation/expression/matrix/mul.spec.ts +++ b/src/webgpu/shader/validation/expression/matrix/mul.spec.ts @@ -631,7 +631,7 @@ g.test('overflow_mat_f16_internal') for (let i = 0; i < t.params.c; i++) { for (let k = 0; k < t.params.r; k++) { lhs += `${t.params.lhs},`; - rhs += `1`; + rhs += `1,`; } } rhs += ')'; diff --git a/src/webgpu/shader/validation/extension/clip_distances.spec.ts b/src/webgpu/shader/validation/extension/clip_distances.spec.ts new file mode 100644 index 000000000000..88957d8e8e62 --- /dev/null +++ b/src/webgpu/shader/validation/extension/clip_distances.spec.ts @@ -0,0 +1,43 @@ +export const description = ` +Validation tests for the clip_distances extension +`; + +import { makeTestGroup } from '../../../../common/framework/test_group.js'; +import { ShaderValidationTest } from '../shader_validation_test.js'; + +export const g = makeTestGroup(ShaderValidationTest); + +g.test('use_clip_distances_requires_extension_enabled') + .desc( + `Checks that the clip_distances built-in variable is only allowed with the WGSL extension + clip_distances enabled in shader and the WebGPU extension clip-distances supported on the + device.` + ) + .params(u => + u.combine('requireExtension', [true, false]).combine('enableExtension', [true, false]) + ) + .beforeAllSubcases(t => { + if (t.params.requireExtension) { + t.selectDeviceOrSkipTestCase({ requiredFeatures: ['clip-distances'] }); + } + }) + .fn(t => { + const { requireExtension, enableExtension } = t.params; + + t.expectCompileResult( + requireExtension && enableExtension, + ` + ${enableExtension ? 'enable clip_distances;' : ''} + struct VertexOut { + @builtin(clip_distances) my_clip_distances : array, + @builtin(position) my_position : vec4f, + } + @vertex fn main() -> VertexOut { + var output : VertexOut; + output.my_clip_distances[0] = 1.0; + output.my_position = vec4f(0.0, 0.0, 0.0, 1.0); + return output; + } + ` + ); + }); diff --git a/src/webgpu/shader/validation/parse/identifiers.spec.ts b/src/webgpu/shader/validation/parse/identifiers.spec.ts index 0dd429d0a72c..4a7ec70120ff 100644 --- a/src/webgpu/shader/validation/parse/identifiers.spec.ts +++ b/src/webgpu/shader/validation/parse/identifiers.spec.ts @@ -199,6 +199,8 @@ const kInvalidIdentifiers = new Set([ 'noexcept', 'noinline', 'nointerpolation', + 'non_coherent', + 'noncoherent', 'noperspective', 'null', 'nullptr', diff --git a/src/webgpu/shader/validation/shader_io/builtins.spec.ts b/src/webgpu/shader/validation/shader_io/builtins.spec.ts index 85a30fa0ec60..3d01f8f23a3e 100644 --- a/src/webgpu/shader/validation/shader_io/builtins.spec.ts +++ b/src/webgpu/shader/validation/shader_io/builtins.spec.ts @@ -10,7 +10,7 @@ export const g = makeTestGroup(ShaderValidationTest); // List of all built-in variables and their stage, in|out usage, and type. // Taken from table in Section 15: -// https://www.w3.org/TR/2021/WD-WGSL-20211013/#builtin-variables +// https://www.w3.org/TR/WGSL/#builtin-inputs-outputs export const kBuiltins = [ { name: 'vertex_index', stage: 'vertex', io: 'in', type: 'u32' }, { name: 'instance_index', stage: 'vertex', io: 'in', type: 'u32' }, @@ -30,6 +30,14 @@ export const kBuiltins = [ { name: 'subgroup_size', stage: 'compute', io: 'in', type: 'u32' }, { name: 'subgroup_invocation_id', stage: 'fragment', io: 'in', type: 'u32' }, { name: 'subgroup_size', stage: 'fragment', io: 'in', type: 'u32' }, + { name: 'clip_distances', stage: 'vertex', io: 'out', type: 'array' }, + { name: 'clip_distances', stage: 'vertex', io: 'out', type: 'array' }, + { name: 'clip_distances', stage: 'vertex', io: 'out', type: 'array' }, + { name: 'clip_distances', stage: 'vertex', io: 'out', type: 'array' }, + { name: 'clip_distances', stage: 'vertex', io: 'out', type: 'array' }, + { name: 'clip_distances', stage: 'vertex', io: 'out', type: 'array' }, + { name: 'clip_distances', stage: 'vertex', io: 'out', type: 'array' }, + { name: 'clip_distances', stage: 'vertex', io: 'out', type: 'array' }, ] as const; // List of types to test against. @@ -64,7 +72,15 @@ const kTestTypes = [ 'array', 'array', 'array', + 'array', + 'array', + 'array', 'array', + 'array', + 'array', + 'array', + 'array', + 'array', 'MyStruct', ] as const; @@ -87,7 +103,16 @@ g.test('stage_inout') ); if (t.params.name.includes('subgroup')) { t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + } else if (t.params.name === 'clip_distances') { + t.selectDeviceOrSkipTestCase('clip-distances' as GPUFeatureName); } + t.skipIf( + t.params.name !== 'position' && + t.params.target_stage === 'vertex' && + t.params.target_io === 'out' && + !t.params.use_struct, + 'missing @builtin(position) in the vertex output when the vertex output is not a struct' + ); }) .fn(t => { const code = generateShader({ @@ -117,9 +142,9 @@ g.test('type') .params(u => u .combineWithParams(kBuiltins) + .combine('use_struct', [true, false] as const) .beginSubcases() .combine('target_type', kTestTypes) - .combine('use_struct', [true, false] as const) ) .beforeAllSubcases(t => { t.skipIf( @@ -128,7 +153,16 @@ g.test('type') ); if (t.params.name.includes('subgroup')) { t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + } else if (t.params.name === 'clip_distances') { + t.selectDeviceOrSkipTestCase('clip-distances' as GPUFeatureName); } + t.skipIf( + t.params.name !== 'position' && + t.params.stage === 'vertex' && + t.params.io === 'out' && + !t.params.use_struct, + 'missing @builtin(position) in the vertex output' + ); }) .fn(t => { let code = ''; @@ -297,14 +331,30 @@ g.test('reuse_builtin_name') u .combineWithParams(kBuiltins) .combine('use', ['alias', 'struct', 'function', 'module-var', 'function-var']) + .combine('enable_extension', [true, false]) + .unless( + t => t.enable_extension && !(t.name.includes('subgroup') || t.name === 'clip_distances') + ) ) .beforeAllSubcases(t => { + if (!t.params.enable_extension) { + return; + } if (t.params.name.includes('subgroup')) { t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + } else if (t.params.name === 'clip_distances') { + t.selectDeviceOrSkipTestCase('clip-distances' as GPUFeatureName); } }) .fn(t => { let code = ''; + if (t.params.enable_extension) { + if (t.params.name.includes('subgroups')) { + code += 'enable subgroup;\n'; + } else if (t.params.name === 'clip_distances') { + code += 'enable clip_distances;\n'; + } + } if (t.params.use === 'alias') { code += `alias ${t.params.name} = i32;`; } else if (t.params.use === `struct`) { diff --git a/src/webgpu/shader/validation/shader_io/interpolate.spec.ts b/src/webgpu/shader/validation/shader_io/interpolate.spec.ts index 933093e16f0f..b716093144b0 100644 --- a/src/webgpu/shader/validation/shader_io/interpolate.spec.ts +++ b/src/webgpu/shader/validation/shader_io/interpolate.spec.ts @@ -9,15 +9,11 @@ import { generateShader } from './util.js'; export const g = makeTestGroup(ShaderValidationTest); // List of valid interpolation attributes. -const kValidCompatInterpolationAttributes = new Set([ +const kValidInterpolationAttributes = new Set([ '', - '@interpolate(flat, either)', '@interpolate(perspective)', '@interpolate(perspective, center)', '@interpolate(perspective, centroid)', -]); -const kValidInterpolationAttributes = new Set([ - ...kValidCompatInterpolationAttributes, '@interpolate(flat)', '@interpolate(flat, first)', '@interpolate(flat, either)', @@ -83,10 +79,7 @@ g.test('type_and_sampling') io: t.params.io, use_struct: t.params.use_struct, }); - const validInterpolationAttributes = t.isCompatibility - ? kValidCompatInterpolationAttributes - : kValidInterpolationAttributes; - t.expectCompileResult(validInterpolationAttributes.has(interpolate), code); + t.expectCompileResult(kValidInterpolationAttributes.has(interpolate), code); }); g.test('require_location') @@ -140,9 +133,7 @@ g.test('integral_types') use_struct: t.params.use_struct, }); - const expectSuccess = t.isCompatibility - ? t.params.attribute === '@interpolate(flat, either)' - : t.params.attribute.startsWith('@interpolate(flat'); + const expectSuccess = t.params.attribute.startsWith('@interpolate(flat'); t.expectCompileResult(expectSuccess, code); }); @@ -160,7 +151,7 @@ g.test('duplicate') t.expectCompileResult(t.params.attr === '', code); }); -const kValidationTests: { [key: string]: { src: string; pass: boolean; compatPass?: boolean } } = { +const kValidationTests: { [key: string]: { src: string; pass: boolean } } = { valid: { src: `@interpolate(perspective)`, pass: true, @@ -172,7 +163,6 @@ const kValidationTests: { [key: string]: { src: string; pass: boolean; compatPas trailing_comma_one_arg: { src: `@interpolate(flat,)`, pass: true, - compatPass: false, }, trailing_comma_two_arg: { src: `@interpolate(perspective, center,)`, @@ -230,9 +220,6 @@ g.test('interpolation_validation') @builtin(position) vec4 { return vec4f(0); }`; - const expectSuccess = - kValidationTests[t.params.attr].pass && - (t.isCompatibility ? kValidationTests[t.params.attr].compatPass ?? true : true); - + const expectSuccess = kValidationTests[t.params.attr].pass; t.expectCompileResult(expectSuccess, code); }); diff --git a/src/webgpu/shader/validation/shader_io/util.ts b/src/webgpu/shader/validation/shader_io/util.ts index d115d79328b4..b71fd2aab42a 100644 --- a/src/webgpu/shader/validation/shader_io/util.ts +++ b/src/webgpu/shader/validation/shader_io/util.ts @@ -27,6 +27,9 @@ export function generateShader({ if (attribute.includes('subgroup')) { code += 'enable subgroups;\n'; } + if (attribute.includes('clip_distances')) { + code += 'enable clip_distances;\n'; + } if (use_struct) { // Generate a struct that wraps the entry point IO variable. diff --git a/src/webgpu/shader/validation/shader_validation_test.ts b/src/webgpu/shader/validation/shader_validation_test.ts index 6a4cae331766..5db47bd586ba 100644 --- a/src/webgpu/shader/validation/shader_validation_test.ts +++ b/src/webgpu/shader/validation/shader_validation_test.ts @@ -119,9 +119,14 @@ export class ShaderValidationTest extends GPUTest { constants?: Record; // List of additional module-scope variable the entrypoint needs to reference reference?: string[]; + // List of additional statements to insert in the entry point. + statements?: string[]; }) { const phonies: Array = []; + if (args.statements !== undefined) { + phonies.push(...args.statements); + } if (args.constants !== undefined) { phonies.push(...keysOf(args.constants).map(c => `_ = ${c};`)); } diff --git a/src/webgpu/shader/validation/types/textures.spec.ts b/src/webgpu/shader/validation/types/textures.spec.ts index 7b8f1748c113..f619877e2bc1 100644 --- a/src/webgpu/shader/validation/types/textures.spec.ts +++ b/src/webgpu/shader/validation/types/textures.spec.ts @@ -120,7 +120,7 @@ Besides, the shader compilation should always pass regardless of whether the for const { format, access, comma } = t.params; // bgra8unorm is considered a valid storage format at shader compilation stage const isFormatValid = - isTextureFormatUsableAsStorageFormat(format, t.isCompatibility) || format === 'bgra8unorm'; + isTextureFormatUsableAsStorageFormat(format, false) || format === 'bgra8unorm'; const isAccessValid = kAccessModes.includes(access); const wgsl = `@group(0) @binding(0) var tex: texture_storage_2d<${format}, ${access}${comma}>;`; t.expectCompileResult(isFormatValid && isAccessValid, wgsl); diff --git a/src/webgpu/util/math.ts b/src/webgpu/util/math.ts index 20d7818df65d..d5ca2b41320e 100644 --- a/src/webgpu/util/math.ts +++ b/src/webgpu/util/math.ts @@ -961,6 +961,17 @@ export function scalarF32Range( counts.neg_norm = counts.neg_norm === undefined ? counts.pos_norm : counts.neg_norm; counts.neg_sub = counts.neg_sub === undefined ? counts.pos_sub : counts.neg_sub; + let special_pos: number[] = []; + // The first interior point for 'pos_norm' is at 3. Because we have two special values we start allowing these + // special values as soon as they will fit as interior values. + if (counts.pos_norm >= 4) { + special_pos = [ + // Largest float as signed integer + 0x4effffff, + // Largest float as unsigned integer + 0x4f7fffff, + ]; + } // Generating bit fields first and then converting to f32, so that the spread across the possible f32 values is more // even. Generating against the bounds of f32 values directly results in the values being extremely biased towards the // extremes, since they are so much larger. @@ -980,7 +991,14 @@ export function scalarF32Range( kBit.f32.positive.subnormal.max, counts.pos_sub ), - ...linearRange(kBit.f32.positive.min, kBit.f32.positive.max, counts.pos_norm), + ...[ + ...linearRange( + kBit.f32.positive.min, + kBit.f32.positive.max, + counts.pos_norm - special_pos.length + ), + ...special_pos, + ].sort((n1, n2) => n1 - n2), ].map(Math.trunc); return bit_fields.map(reinterpretU32AsF32); } diff --git a/src/webgpu/util/texture.ts b/src/webgpu/util/texture.ts index badce71baa34..20e99fdfad4d 100644 --- a/src/webgpu/util/texture.ts +++ b/src/webgpu/util/texture.ts @@ -17,6 +17,7 @@ const kLoadValueFromStorageInfo: Partial<{ texelType: string; unpackWGSL: string; useFragDepth?: boolean; + discardWithStencil?: boolean; }; }> = { r8unorm: { @@ -233,17 +234,27 @@ const kLoadValueFromStorageInfo: Partial<{ `, useFragDepth: true, }, + stencil8: { + storageType: 'u32', + texelType: 'vec4u', + unpackWGSL: ` + return vec4u(unpack4xU8(src[byteOffset / 4])[byteOffset % 4], 123, 123, 123) + `, + discardWithStencil: true, + }, }; function getCopyBufferToTextureViaRenderCode(format: GPUTextureFormat) { const info = kLoadValueFromStorageInfo[format]; assert(!!info); - const { storageType, texelType, unpackWGSL, useFragDepth } = info; + const { storageType, texelType, unpackWGSL, useFragDepth, discardWithStencil } = info; const [depthDecl, depthCode] = useFragDepth ? ['@builtin(frag_depth) d: f32,', 'fs.d = fs.v[0];'] : ['', '']; + const stencilCode = discardWithStencil ? 'if ((fs.v.r & vin.stencilMask) == 0) { discard; }' : ''; + return ` struct Uniforms { numTexelRows: u32, @@ -255,9 +266,10 @@ function getCopyBufferToTextureViaRenderCode(format: GPUTextureFormat) { struct VSOutput { @builtin(position) pos: vec4f, @location(0) @interpolate(flat, either) sampleIndex: u32, + @location(1) @interpolate(flat, either) stencilMask: u32, }; - @vertex fn vs(@builtin(vertex_index) vNdx: u32) -> VSOutput { + @vertex fn vs(@builtin(vertex_index) vNdx: u32, @builtin(instance_index) iNdx: u32) -> VSOutput { let points = array( vec2f(0, 0), vec2f(1, 0), vec2f(0, 1), vec2f(1, 1), ); @@ -266,7 +278,10 @@ function getCopyBufferToTextureViaRenderCode(format: GPUTextureFormat) { let rowOffset = f32(sampleRow) / numSampleRows; let rowMult = 1.0 / numSampleRows; let p = (points[vNdx % 4] * vec2f(1, rowMult) + vec2f(0, rowOffset)) * 2.0 - 1.0; - return VSOutput(vec4f(p, 0, 1), uni.sampleCount - sampleRow % uni.sampleCount - 1); + return VSOutput( + vec4f(p, 0, 1), + uni.sampleCount - sampleRow % uni.sampleCount - 1, + 1u << iNdx); } @group(0) @binding(0) var uni: Uniforms; @@ -289,6 +304,7 @@ function getCopyBufferToTextureViaRenderCode(format: GPUTextureFormat) { var fs: FSOutput; fs.v = unpack(byteOffset); ${depthCode} + ${stencilCode} return fs; } `; @@ -312,114 +328,158 @@ function copyBufferToTextureViaRender( const msInfo = kLoadValueFromStorageInfo[format]; assert(!!msInfo); - const { useFragDepth } = msInfo; + const { useFragDepth, discardWithStencil } = msInfo; const { device } = t; - const code = getCopyBufferToTextureViaRenderCode(format); - const id = JSON.stringify({ format, useFragDepth, sampleCount, code }); - const pipelines = - s_copyBufferToTextureViaRenderPipelines.get(device) ?? new Map(); - s_copyBufferToTextureViaRenderPipelines.set(device, pipelines); - let pipeline = pipelines.get(id); - if (!pipeline) { - const module = device.createShaderModule({ code }); - pipeline = device.createRenderPipeline({ - layout: 'auto', - vertex: { module }, - ...(useFragDepth - ? { - fragment: { - module, - targets: [], - }, - depthStencil: { - depthWriteEnabled: true, - depthCompare: 'always', - format, - }, - } - : { - fragment: { - module, - targets: [{ format }], - }, - }), - primitive: { - topology: 'triangle-strip', - }, - ...(sampleCount > 1 && { multisample: { count: sampleCount } }), + const numBlits = discardWithStencil ? 8 : 1; + for (let blitCount = 0; blitCount < numBlits; ++blitCount) { + const code = getCopyBufferToTextureViaRenderCode(format); + const stencilWriteMask = 1 << blitCount; + const id = JSON.stringify({ + format, + useFragDepth, + stencilWriteMask, + discardWithStencil, + sampleCount, + code, }); - pipelines.set(id, pipeline); - } + const pipelines = + s_copyBufferToTextureViaRenderPipelines.get(device) ?? new Map(); + s_copyBufferToTextureViaRenderPipelines.set(device, pipelines); + let pipeline = pipelines.get(id); + if (!pipeline) { + const module = device.createShaderModule({ code }); + pipeline = device.createRenderPipeline({ + label: `blitCopyFor-${format}`, + layout: 'auto', + vertex: { module }, + ...(discardWithStencil + ? { + fragment: { + module, + targets: [], + }, + depthStencil: { + depthWriteEnabled: false, + depthCompare: 'always', + format, + stencilWriteMask, + stencilFront: { + passOp: 'replace', + }, + }, + } + : useFragDepth + ? { + fragment: { + module, + targets: [], + }, + depthStencil: { + depthWriteEnabled: true, + depthCompare: 'always', + format, + }, + } + : { + fragment: { + module, + targets: [{ format }], + }, + }), + primitive: { + topology: 'triangle-strip', + }, + ...(sampleCount > 1 && { multisample: { count: sampleCount } }), + }); + pipelines.set(id, pipeline); + } - const info = kTextureFormatInfo[format]; - const uniforms = new Uint32Array([ - copySize.height, // numTexelRows: u32, - source.bytesPerRow!, // bytesPerRow: u32, - info.bytesPerBlock!, // bytesPerSample: u32, - dest.texture.sampleCount, // sampleCount: u32, - ]); - const uniformBuffer = t.makeBufferWithContents( - uniforms, - GPUBufferUsage.COPY_DST | GPUBufferUsage.UNIFORM - ); - const storageBuffer = t.createBufferTracked({ - size: source.buffer.size, - usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE, - }); - encoder.copyBufferToBuffer(source.buffer, 0, storageBuffer, 0, storageBuffer.size); - const baseMipLevel = dest.mipLevel; - for (let l = 0; l < copySize.depthOrArrayLayers; ++l) { - const baseArrayLayer = origin.z + l; - const mipLevelCount = 1; - const arrayLayerCount = 1; - const pass = encoder.beginRenderPass( - useFragDepth - ? { - colorAttachments: [], - depthStencilAttachment: { - view: dest.texture.createView({ - baseMipLevel, - baseArrayLayer, - mipLevelCount, - arrayLayerCount, - }), - depthClearValue: 0, - depthLoadOp: 'clear', - depthStoreOp: 'store', - }, - } - : { - colorAttachments: [ - { + const info = kTextureFormatInfo[format]; + const uniforms = new Uint32Array([ + copySize.height, // numTexelRows: u32, + source.bytesPerRow!, // bytesPerRow: u32, + info.bytesPerBlock!, // bytesPerSample: u32, + dest.texture.sampleCount, // sampleCount: u32, + ]); + const uniformBuffer = t.makeBufferWithContents( + uniforms, + GPUBufferUsage.COPY_DST | GPUBufferUsage.UNIFORM + ); + const storageBuffer = t.createBufferTracked({ + size: source.buffer.size, + usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE, + }); + encoder.copyBufferToBuffer(source.buffer, 0, storageBuffer, 0, storageBuffer.size); + const baseMipLevel = dest.mipLevel; + for (let l = 0; l < copySize.depthOrArrayLayers; ++l) { + const baseArrayLayer = origin.z + l; + const mipLevelCount = 1; + const arrayLayerCount = 1; + const pass = encoder.beginRenderPass( + discardWithStencil + ? { + colorAttachments: [], + depthStencilAttachment: { view: dest.texture.createView({ baseMipLevel, baseArrayLayer, mipLevelCount, arrayLayerCount, }), - loadOp: 'clear', - storeOp: 'store', + stencilClearValue: 0, + stencilLoadOp: 'load', + stencilStoreOp: 'store', }, - ], - } - ); - pass.setViewport(origin.x, origin.y, copySize.width, copySize.height, 0, 1); - pass.setPipeline(pipeline); + } + : useFragDepth + ? { + colorAttachments: [], + depthStencilAttachment: { + view: dest.texture.createView({ + baseMipLevel, + baseArrayLayer, + mipLevelCount, + arrayLayerCount, + }), + depthClearValue: 0, + depthLoadOp: 'clear', + depthStoreOp: 'store', + }, + } + : { + colorAttachments: [ + { + view: dest.texture.createView({ + baseMipLevel, + baseArrayLayer, + mipLevelCount, + arrayLayerCount, + }), + loadOp: 'clear', + storeOp: 'store', + }, + ], + } + ); + pass.setViewport(origin.x, origin.y, copySize.width, copySize.height, 0, 1); + pass.setPipeline(pipeline); - const offset = - (source.offset ?? 0) + (source.bytesPerRow ?? 0) * (source.rowsPerImage ?? 0) * l; - const bindGroup = device.createBindGroup({ - layout: pipeline.getBindGroupLayout(0), - entries: [ - { binding: 0, resource: { buffer: uniformBuffer } }, - { binding: 1, resource: { buffer: storageBuffer, offset } }, - ], - }); + const offset = + (source.offset ?? 0) + (source.bytesPerRow ?? 0) * (source.rowsPerImage ?? 0) * l; + const bindGroup = device.createBindGroup({ + layout: pipeline.getBindGroupLayout(0), + entries: [ + { binding: 0, resource: { buffer: uniformBuffer } }, + { binding: 1, resource: { buffer: storageBuffer, offset } }, + ], + }); - pass.setBindGroup(0, bindGroup); - pass.draw(4 * copySize.height * dest.texture.sampleCount); - pass.end(); + pass.setBindGroup(0, bindGroup); + pass.setStencilReference(0xff); + pass.draw(4 * copySize.height * dest.texture.sampleCount, 1, 0, blitCount); + pass.end(); + } } } diff --git a/src/webgpu/util/texture/base.ts b/src/webgpu/util/texture/base.ts index c5c6aaf20579..0bdcb141db2b 100644 --- a/src/webgpu/util/texture/base.ts +++ b/src/webgpu/util/texture/base.ts @@ -239,6 +239,7 @@ export function reifyTextureViewDescriptor( const format = view.format ?? texture.format; const mipLevelCount = view.mipLevelCount ?? texture.mipLevelCount - baseMipLevel; const dimension = view.dimension ?? defaultViewDimensionsForTexture(texture); + const usage = (view.usage ?? 0) === 0 ? texture.usage : view.usage!; let arrayLayerCount = view.arrayLayerCount; if (arrayLayerCount === undefined) { @@ -255,6 +256,7 @@ export function reifyTextureViewDescriptor( format, dimension, aspect, + usage, baseMipLevel, mipLevelCount, baseArrayLayer, diff --git a/src/webgpu/web_platform/canvas/configure.spec.ts b/src/webgpu/web_platform/canvas/configure.spec.ts index 65b0bc1f9d7b..06e590751b33 100644 --- a/src/webgpu/web_platform/canvas/configure.spec.ts +++ b/src/webgpu/web_platform/canvas/configure.spec.ts @@ -3,7 +3,7 @@ Tests for GPUCanvasContext.configure. TODO: - Test colorSpace -- Test viewFormats +- Test toneMapping `; import { makeTestGroup } from '../../../common/framework/test_group.js'; @@ -42,6 +42,16 @@ g.test('defaults') format: 'rgba8unorm', }); + const configuration = ctx.getConfiguration(); + assert(configuration !== null); + t.expect(configuration.device === t.device); + t.expect(configuration.format === 'rgba8unorm'); + t.expect(configuration.usage === GPUTextureUsage.RENDER_ATTACHMENT); + t.expect(configuration.viewFormats.length === 0); + t.expect(configuration.colorSpace === 'srgb'); + t.expect(configuration.toneMapping.mode === 'standard'); + t.expect(configuration.alphaMode === 'opaque'); + const currentTexture = ctx.getCurrentTexture(); t.expect(currentTexture.format === 'rgba8unorm'); t.expect(currentTexture.usage === GPUTextureUsage.RENDER_ATTACHMENT); @@ -69,6 +79,9 @@ g.test('device') const ctx = canvas.getContext('webgpu'); assert(ctx instanceof GPUCanvasContext, 'Failed to get WebGPU context from canvas'); + // getConfiguration returns null before configure. + t.expect(ctx.getConfiguration() === null); + // Calling configure without a device should throw a TypeError. t.shouldThrow('TypeError', () => { ctx.configure({ @@ -85,8 +98,20 @@ g.test('device') ctx.configure({ device: t.device, format: 'rgba8unorm', + alphaMode: 'opaque', }); + // getConfiguration will succeed after configure. + const configuration = ctx.getConfiguration(); + assert(configuration !== null); + t.expect(configuration.device === t.device); + t.expect(configuration.format === 'rgba8unorm'); + t.expect(configuration.usage === GPUTextureUsage.RENDER_ATTACHMENT); + t.expect(configuration.viewFormats.length === 0); + t.expect(configuration.colorSpace === 'srgb'); + t.expect(configuration.toneMapping.mode === 'standard'); + t.expect(configuration.alphaMode === 'opaque'); + // getCurrentTexture will succeed with a valid device. ctx.getCurrentTexture(); @@ -96,12 +121,27 @@ g.test('device') ctx.getCurrentTexture(); }); + // getConfiguration returns null after unconfigure. + t.expect(ctx.getConfiguration() === null); + // Should be able to successfully configure again after unconfiguring. ctx.configure({ device: t.device, format: 'rgba8unorm', + alphaMode: 'premultiplied', }); ctx.getCurrentTexture(); + + // getConfiguration will succeed after configure. + const newConfiguration = ctx.getConfiguration(); + assert(newConfiguration !== null); + t.expect(newConfiguration.device === t.device); + t.expect(newConfiguration.format === 'rgba8unorm'); + t.expect(newConfiguration.usage === GPUTextureUsage.RENDER_ATTACHMENT); + t.expect(newConfiguration.viewFormats.length === 0); + t.expect(newConfiguration.colorSpace === 'srgb'); + t.expect(newConfiguration.toneMapping.mode === 'standard'); + t.expect(newConfiguration.alphaMode === 'premultiplied'); }); g.test('format') @@ -133,18 +173,21 @@ g.test('format') } } - t.expectValidationError(() => { + if (validFormat) { ctx.configure({ device: t.device, format, }); - }, !validFormat); - - t.expectValidationError(() => { - // Should always return a texture, whether the configured format was valid or not. - const currentTexture = ctx.getCurrentTexture(); - t.expect(currentTexture instanceof GPUTexture); - }, !validFormat); + const configuration = ctx.getConfiguration(); + t.expect(configuration!.format === format); + } else { + t.shouldThrow('TypeError', () => { + ctx.configure({ + device: t.device, + format, + }); + }); + } }); g.test('usage') @@ -179,6 +222,9 @@ g.test('usage') usage, }); + const configuration = ctx.getConfiguration(); + t.expect(configuration!.usage === usage); + const currentTexture = ctx.getCurrentTexture(); t.expect(currentTexture instanceof GPUTexture); t.expect(currentTexture.usage === usage); @@ -289,6 +335,9 @@ g.test('alpha_mode') alphaMode, }); + const configuration = ctx.getConfiguration(); + t.expect(configuration!.alphaMode === alphaMode); + const currentTexture = ctx.getCurrentTexture(); t.expect(currentTexture instanceof GPUTexture); }); @@ -412,6 +461,9 @@ g.test('viewFormats') }); }, !compatible); + const viewFormats = ctx.getConfiguration()!.viewFormats; + t.expect(viewFormats[0] === viewFormat); + // Likewise for getCurrentTexture(). let currentTexture: GPUTexture; t.expectValidationError(() => { diff --git a/src/webgpu/web_platform/reftests/gpu_ref_test.ts b/src/webgpu/web_platform/reftests/gpu_ref_test.ts index 48161ac33e87..051fb52f0037 100644 --- a/src/webgpu/web_platform/reftests/gpu_ref_test.ts +++ b/src/webgpu/web_platform/reftests/gpu_ref_test.ts @@ -1,5 +1,5 @@ import { assert } from '../../../common/util/util.js'; -import { takeScreenshotDelayed } from '../../../common/util/wpt_reftest_wait.js'; +import { takeScreenshot, takeScreenshotDelayed } from '../../../common/util/wpt_reftest_wait.js'; interface GPURefTest { readonly device: GPUDevice; @@ -22,5 +22,8 @@ export function runRefTest(fn: (t: GPURefTest) => Promise | void): void { await fn({ device, queue }); takeScreenshotDelayed(50); - })(); + })().catch(() => { + // remove reftest-wait to mark end of test + takeScreenshot(); + }); } diff --git a/standalone/index.html b/standalone/index.html index d087d6584cd9..5c21c1033744 100644 --- a/standalone/index.html +++ b/standalone/index.html @@ -13,8 +13,10 @@ - - + + + +