diff --git a/Gruntfile.js b/Gruntfile.js
index 7f004ab468ec..f151bad399df 100644
--- a/Gruntfile.js
+++ b/Gruntfile.js
@@ -24,7 +24,10 @@ module.exports = function (grunt) {
     pkg: grunt.file.readJSON('package.json'),
 
     clean: {
-      out: ['gen/', 'out/', 'out-wpt/', 'out-node/'],
+      gen: ['gen/'],
+      out: ['out/'],
+      'out-wpt': ['out-wpt/'],
+      'out-node': ['out-node/'],
     },
 
     run: {
@@ -246,17 +249,20 @@ module.exports = function (grunt) {
   });
 
   grunt.registerTask('generate-common', 'Generate files into gen/ and src/', [
+    'clean:gen',
     'run:generate-version',
     'run:generate-listings-and-webworkers',
     'run:generate-cache',
   ]);
   grunt.registerTask('build-standalone', 'Build out/ (no checks; run after generate-common)', [
+    'clean:out',
     'run:build-out',
     'run:copy-assets',
     'copy:gen-to-out',
     'copy:htmlfiles-to-out',
   ]);
   grunt.registerTask('build-wpt', 'Build out-wpt/ (no checks; run after generate-common)', [
+    'clean:out-wpt',
     'run:build-out-wpt',
     'run:copy-assets-wpt',
     'copy:gen-to-out-wpt',
@@ -265,6 +271,7 @@ module.exports = function (grunt) {
     'run:autoformat-out-wpt',
   ]);
   grunt.registerTask('build-node', 'Build out-node/ (no checks; run after generate-common)', [
+    'clean:out-node',
     'run:build-out-node',
     'run:copy-assets-node',
   ]);
@@ -282,7 +289,6 @@ module.exports = function (grunt) {
   grunt.registerTask('pre', ['all']);
 
   registerTaskAndAddToHelp('all', 'Run all builds and checks', [
-    'clean',
     'generate-common',
     'concurrent:all-builds-and-checks',
   ]);
diff --git a/package-lock.json b/package-lock.json
index 4837e5c70485..cc4e97a2ce64 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -24,7 +24,7 @@
         "@types/w3c-image-capture": "^1.0.10",
         "@typescript-eslint/eslint-plugin": "^6.9.1",
         "@typescript-eslint/parser": "^6.9.1",
-        "@webgpu/types": "^0.1.43",
+        "@webgpu/types": "^0.1.49",
         "ansi-colors": "4.1.3",
         "babel-plugin-add-header-comment": "^1.0.3",
         "babel-plugin-const-enum": "^1.2.0",
@@ -1539,9 +1539,9 @@
       "dev": true
     },
     "node_modules/@webgpu/types": {
-      "version": "0.1.43",
-      "resolved": "https://registry.npmjs.org/@webgpu/types/-/types-0.1.43.tgz",
-      "integrity": "sha512-HoP+d+m+Kuq8CsE63BZ3+BYBKAemrqbHUNrCalxrUju5XW+q/094Q3oeIa+2pTraEbO8ckJmGpibzyGT4OV4YQ==",
+      "version": "0.1.49",
+      "resolved": "https://registry.npmjs.org/@webgpu/types/-/types-0.1.49.tgz",
+      "integrity": "sha512-NMmS8/DofhH/IFeW+876XrHVWel+J/vdcFCHLDqeJgkH9x0DeiwjVd8LcBdaxdG/T7Rf8VUAYsA8X1efMzLjRQ==",
       "dev": true
     },
     "node_modules/abbrev": {
@@ -10076,9 +10076,9 @@
       "dev": true
     },
     "@webgpu/types": {
-      "version": "0.1.43",
-      "resolved": "https://registry.npmjs.org/@webgpu/types/-/types-0.1.43.tgz",
-      "integrity": "sha512-HoP+d+m+Kuq8CsE63BZ3+BYBKAemrqbHUNrCalxrUju5XW+q/094Q3oeIa+2pTraEbO8ckJmGpibzyGT4OV4YQ==",
+      "version": "0.1.49",
+      "resolved": "https://registry.npmjs.org/@webgpu/types/-/types-0.1.49.tgz",
+      "integrity": "sha512-NMmS8/DofhH/IFeW+876XrHVWel+J/vdcFCHLDqeJgkH9x0DeiwjVd8LcBdaxdG/T7Rf8VUAYsA8X1efMzLjRQ==",
       "dev": true
     },
     "abbrev": {
diff --git a/package.json b/package.json
index 9d311579c314..3ef62315db25 100644
--- a/package.json
+++ b/package.json
@@ -50,7 +50,7 @@
     "@types/w3c-image-capture": "^1.0.10",
     "@typescript-eslint/eslint-plugin": "^6.9.1",
     "@typescript-eslint/parser": "^6.9.1",
-    "@webgpu/types": "^0.1.43",
+    "@webgpu/types": "^0.1.49",
     "ansi-colors": "4.1.3",
     "babel-plugin-add-header-comment": "^1.0.3",
     "babel-plugin-const-enum": "^1.2.0",
diff --git a/src/common/framework/test_config.ts b/src/common/framework/test_config.ts
index e6624ae12014..072aaf736027 100644
--- a/src/common/framework/test_config.ts
+++ b/src/common/framework/test_config.ts
@@ -4,8 +4,20 @@ export type TestConfig = {
    */
   enableDebugLogs: boolean;
 
+  /**
+   * Maximum number of subcases in flight at once, within a case. Once this many
+   * are in flight, wait for a subcase to finish before starting the next one.
+   */
   maxSubcasesInFlight: number;
+
+  /**
+   * Every `subcasesBetweenAttemptingGC` subcases, run `attemptGarbageCollection()`.
+   * Setting to `Infinity` disables this. Setting to 1 attempts GC every time (slow!).
+   */
+  subcasesBetweenAttemptingGC: number;
+
   testHeartbeatCallback: () => void;
+
   noRaceWithRejectOnTimeout: boolean;
 
   /**
@@ -40,7 +52,8 @@ export type TestConfig = {
 
 export const globalTestConfig: TestConfig = {
   enableDebugLogs: false,
-  maxSubcasesInFlight: 500,
+  maxSubcasesInFlight: 100,
+  subcasesBetweenAttemptingGC: 5000,
   testHeartbeatCallback: () => {},
   noRaceWithRejectOnTimeout: false,
   unrollConstEvalLoops: false,
diff --git a/src/common/internal/logging/test_case_recorder.ts b/src/common/internal/logging/test_case_recorder.ts
index 78f625269e3d..eb03f4ea96f7 100644
--- a/src/common/internal/logging/test_case_recorder.ts
+++ b/src/common/internal/logging/test_case_recorder.ts
@@ -44,7 +44,7 @@ export class TestCaseRecorder {
   private startTime = -1;
   private logs: LogMessageWithStack[] = [];
   private logLinesAtCurrentSeverity = 0;
-  private debugging = false;
+  public debugging = false;
 
   constructor(result: LiveTestCaseResult, debugging: boolean) {
     this.result = result;
diff --git a/src/common/internal/test_group.ts b/src/common/internal/test_group.ts
index e1d0cde12d5c..ac3b11082771 100644
--- a/src/common/internal/test_group.ts
+++ b/src/common/internal/test_group.ts
@@ -31,6 +31,7 @@ import {
   stringifyPublicParamsUniquely,
 } from '../internal/query/stringify_params.js';
 import { validQueryPart } from '../internal/query/validQueryPart.js';
+import { attemptGarbageCollection } from '../util/collect_garbage.js';
 import { DeepReadonly } from '../util/types.js';
 import { assert, unreachable } from '../util/util.js';
 
@@ -620,7 +621,7 @@ class RunCaseSpecific implements RunCase {
             const subcasePrefix = 'subcase: ' + stringifyPublicParams(subParams);
             const subRec = new Proxy(rec, {
               get: (target, k: keyof TestCaseRecorder) => {
-                const prop = TestCaseRecorder.prototype[k];
+                const prop = rec[k] ?? TestCaseRecorder.prototype[k];
                 if (typeof prop === 'function') {
                   testHeartbeatCallback();
                   return function (...args: Parameters<typeof prop>) {
@@ -696,6 +697,7 @@ class RunCaseSpecific implements RunCase {
                   subRec.threw(ex);
                 }
               })
+              .finally(attemptGarbageCollectionIfDue)
               .finally(subcaseFinishedCallback);
 
             allPreviousSubcasesFinalizedPromise = allPreviousSubcasesFinalizedPromise.then(
@@ -711,13 +713,17 @@ class RunCaseSpecific implements RunCase {
             rec.skipped(new SkipTestCase('all subcases were skipped'));
           }
         } else {
-          await this.runTest(
-            rec,
-            sharedState,
-            this.params,
-            /* throwSkip */ false,
-            getExpectedStatus(selfQuery)
-          );
+          try {
+            await this.runTest(
+              rec,
+              sharedState,
+              this.params,
+              /* throwSkip */ false,
+              getExpectedStatus(selfQuery)
+            );
+          } finally {
+            await attemptGarbageCollectionIfDue();
+          }
         }
       } finally {
         testHeartbeatCallback();
@@ -754,3 +760,17 @@ export type CaseTimingLogLine = {
    */
   nonskippedSubcaseCount: number;
 };
+
+/** Every `subcasesBetweenAttemptingGC` calls to this function will `attemptGarbageCollection()`. */
+const attemptGarbageCollectionIfDue: () => Promise<void> = (() => {
+  // This state is global because garbage is global.
+  let subcasesSinceLastGC = 0;
+
+  return async function attemptGarbageCollectionIfDue() {
+    subcasesSinceLastGC++;
+    if (subcasesSinceLastGC >= globalTestConfig.subcasesBetweenAttemptingGC) {
+      subcasesSinceLastGC = 0;
+      return attemptGarbageCollection();
+    }
+  };
+})();
diff --git a/src/common/runtime/standalone.ts b/src/common/runtime/standalone.ts
index 932c5668b587..0305031cc790 100644
--- a/src/common/runtime/standalone.ts
+++ b/src/common/runtime/standalone.ts
@@ -369,6 +369,9 @@ function makeSubtreeChildrenHTML(
   const runMySubtree = async () => {
     const results: SubtreeResult[] = [];
     for (const { runSubtree } of childFns) {
+      if (stopRequested) {
+        break;
+      }
       results.push(await runSubtree());
     }
     return mergeSubtreeResults(...results);
diff --git a/src/common/tools/dev_server.ts b/src/common/tools/dev_server.ts
index 1d1313e4f51c..8d78855974d6 100644
--- a/src/common/tools/dev_server.ts
+++ b/src/common/tools/dev_server.ts
@@ -106,10 +106,6 @@ const app = express();
 
 // Send Chrome Origin Trial tokens
 app.use((_req, res, next) => {
-  res.header('Origin-Trial', [
-    // Token for http://localhost:8080
-    'AvyDIV+RJoYs8fn3W6kIrBhWw0te0klraoz04mw/nPb8VTus3w5HCdy+vXqsSzomIH745CT6B5j1naHgWqt/tw8AAABJeyJvcmlnaW4iOiJodHRwOi8vbG9jYWxob3N0OjgwODAiLCJmZWF0dXJlIjoiV2ViR1BVIiwiZXhwaXJ5IjoxNjYzNzE4Mzk5fQ==',
-  ]);
   next();
 });
 
diff --git a/src/common/tools/gen_wpt_cts_html.ts b/src/common/tools/gen_wpt_cts_html.ts
index 46c2ae435491..35eac195b33c 100644
--- a/src/common/tools/gen_wpt_cts_html.ts
+++ b/src/common/tools/gen_wpt_cts_html.ts
@@ -9,6 +9,8 @@ import {
 } from '../internal/query/query.js';
 import { assert } from '../util/util.js';
 
+const kMaxQueryLength = 184;
+
 function printUsageAndExit(rc: number): never {
   console.error(`\
 Usage (simple, for webgpu:* suite only):
@@ -193,6 +195,7 @@ let config: Config;
 
   const loader = new DefaultTestFileLoader();
   const lines = [];
+  const tooLongQueries = [];
   for (const prefix of config.argumentsPrefixes) {
     const rootQuery = new TestQueryMultiFile(config.suite, []);
     const tree = await loader.loadTree(rootQuery, {
@@ -219,15 +222,9 @@ let config: Config;
         // Check for a safe-ish path length limit. Filename must be <= 255, and on Windows the whole
         // path must be <= 259. Leave room for e.g.:
         // 'c:\b\s\w\xxxxxxxx\layout-test-results\external\wpt\webgpu\cts_worker=0_q=...-actual.txt'
-        assert(
-          queryString.length < 185,
-          `Generated test variant would produce too-long -actual.txt filename. Possible solutions:
-- Reduce the length of the parts of the test query
-- Reduce the parameterization of the test
-- Make the test function faster and regenerate the listing_meta entry
-- Reduce the specificity of test expectations (if you're using them)
-${queryString}`
-        );
+        if (queryString.length > kMaxQueryLength) {
+          tooLongQueries.push(queryString);
+        }
       }
 
       lines.push({
@@ -243,6 +240,29 @@ ${queryString}`
     }
     prefixComment.comment += `; ${variantCount} variants generated from ${testsSeen.size} tests in ${filesSeen.size} files`;
   }
+
+  if (tooLongQueries.length > 0) {
+    // Try to show some representation of failures. We show one entry from each
+    // test that is different length. Without this the logger cuts off the error
+    // messages and you end up not being told about which tests have issues.
+    const queryStrings = new Map<string, string>();
+    tooLongQueries.forEach(s => {
+      const colonNdx = s.lastIndexOf(':');
+      const prefix = s.substring(0, colonNdx + 1);
+      const id = `${prefix}:${s.length}`;
+      queryStrings.set(id, s);
+    });
+    throw new Error(
+      `Generated test variant would produce too-long -actual.txt filename. Possible solutions:
+  - Reduce the length of the parts of the test query
+  - Reduce the parameterization of the test
+  - Make the test function faster and regenerate the listing_meta entry
+  - Reduce the specificity of test expectations (if you're using them)
+|<${''.padEnd(kMaxQueryLength - 4, '-')}>|
+${[...queryStrings.values()].join('\n')}`
+    );
+  }
+
   await generateFile(lines);
 })().catch(ex => {
   console.log(ex.stack ?? ex.toString());
diff --git a/src/common/util/navigator_gpu.ts b/src/common/util/navigator_gpu.ts
index 6f3a423db39f..4e58797097ed 100644
--- a/src/common/util/navigator_gpu.ts
+++ b/src/common/util/navigator_gpu.ts
@@ -68,12 +68,11 @@ export function getGPU(recorder: TestCaseRecorder | null): GPU {
     ): Promise<GPUAdapter | null> {
       const promise = oldFn.call(this, { ...defaultRequestAdapterOptions, ...options });
       if (recorder) {
-        void promise.then(async adapter => {
+        void promise.then(adapter => {
           if (adapter) {
-            // MAINTENANCE_TODO: Remove requestAdapterInfo when info is implemented.
-            const info = adapter.info || (await adapter.requestAdapterInfo());
-            const infoString = `Adapter: ${info.vendor} / ${info.architecture} / ${info.device}`;
-            recorder.debug(new ErrorWithExtra(infoString, () => ({ adapterInfo: info })));
+            const adapterInfo = adapter.info;
+            const infoString = `Adapter: ${adapterInfo.vendor} / ${adapterInfo.architecture} / ${adapterInfo.device}`;
+            recorder.debug(new ErrorWithExtra(infoString, () => ({ adapterInfo })));
           }
         });
       }
diff --git a/src/resources/cache/hashes.json b/src/resources/cache/hashes.json
index e0459422560f..e2224325944f 100644
--- a/src/resources/cache/hashes.json
+++ b/src/resources/cache/hashes.json
@@ -1,112 +1,112 @@
 {
-  "webgpu/shader/execution/binary/af_addition.bin": "338b5b67",
-  "webgpu/shader/execution/binary/af_logical.bin": "3b2aceb8",
-  "webgpu/shader/execution/binary/af_division.bin": "a77dc4c0",
-  "webgpu/shader/execution/binary/af_matrix_addition.bin": "136a7fbb",
-  "webgpu/shader/execution/binary/af_matrix_subtraction.bin": "90f2c731",
-  "webgpu/shader/execution/binary/af_multiplication.bin": "35ba40b9",
-  "webgpu/shader/execution/binary/af_remainder.bin": "41582f85",
-  "webgpu/shader/execution/binary/af_subtraction.bin": "a41420b2",
-  "webgpu/shader/execution/binary/f16_addition.bin": "ef10ca66",
-  "webgpu/shader/execution/binary/f16_logical.bin": "4bf24ca5",
-  "webgpu/shader/execution/binary/f16_division.bin": "f826b6ba",
-  "webgpu/shader/execution/binary/f16_matrix_addition.bin": "a910ddb0",
-  "webgpu/shader/execution/binary/f16_matrix_matrix_multiplication.bin": "9458671c",
-  "webgpu/shader/execution/binary/f16_matrix_scalar_multiplication.bin": "36be05d3",
-  "webgpu/shader/execution/binary/f16_matrix_subtraction.bin": "8aa6a88a",
-  "webgpu/shader/execution/binary/f16_matrix_vector_multiplication.bin": "38282a11",
-  "webgpu/shader/execution/binary/f16_multiplication.bin": "62f91819",
-  "webgpu/shader/execution/binary/f16_remainder.bin": "f829bb65",
-  "webgpu/shader/execution/binary/f16_subtraction.bin": "82d4e231",
-  "webgpu/shader/execution/binary/f32_addition.bin": "9b0a0c50",
-  "webgpu/shader/execution/binary/f32_logical.bin": "b75af25a",
-  "webgpu/shader/execution/binary/f32_division.bin": "f6d7832f",
-  "webgpu/shader/execution/binary/f32_matrix_addition.bin": "3317c75b",
-  "webgpu/shader/execution/binary/f32_matrix_matrix_multiplication.bin": "c6f990c8",
-  "webgpu/shader/execution/binary/f32_matrix_scalar_multiplication.bin": "b091a702",
-  "webgpu/shader/execution/binary/f32_matrix_subtraction.bin": "2d12a16b",
-  "webgpu/shader/execution/binary/f32_matrix_vector_multiplication.bin": "e1217524",
-  "webgpu/shader/execution/binary/f32_multiplication.bin": "19774fb3",
-  "webgpu/shader/execution/binary/f32_remainder.bin": "fd94bb9a",
-  "webgpu/shader/execution/binary/f32_subtraction.bin": "dba7cd7a",
-  "webgpu/shader/execution/binary/i32_arithmetic.bin": "e3b317e1",
-  "webgpu/shader/execution/binary/i32_comparison.bin": "63fa9be8",
-  "webgpu/shader/execution/binary/u32_arithmetic.bin": "e8b4008c",
-  "webgpu/shader/execution/binary/u32_comparison.bin": "d472fd61",
-  "webgpu/shader/execution/abs.bin": "631d932d",
-  "webgpu/shader/execution/acos.bin": "afcafcb1",
-  "webgpu/shader/execution/acosh.bin": "4b30eb95",
-  "webgpu/shader/execution/asin.bin": "c850c13d",
-  "webgpu/shader/execution/asinh.bin": "66a6acc0",
-  "webgpu/shader/execution/atan.bin": "2aabbb53",
-  "webgpu/shader/execution/atan2.bin": "82dd926a",
-  "webgpu/shader/execution/atanh.bin": "b98c937c",
-  "webgpu/shader/execution/bitcast.bin": "5daaee1b",
-  "webgpu/shader/execution/ceil.bin": "d0c32cf4",
-  "webgpu/shader/execution/clamp.bin": "4d1fc26a",
-  "webgpu/shader/execution/cos.bin": "dc837ae2",
-  "webgpu/shader/execution/cosh.bin": "d9e90580",
-  "webgpu/shader/execution/cross.bin": "ce7979f",
-  "webgpu/shader/execution/degrees.bin": "1436a196",
-  "webgpu/shader/execution/determinant.bin": "f36f1fa1",
-  "webgpu/shader/execution/distance.bin": "5103f8bd",
-  "webgpu/shader/execution/dot.bin": "4514172c",
-  "webgpu/shader/execution/exp.bin": "f41150bd",
-  "webgpu/shader/execution/exp2.bin": "19c494e",
-  "webgpu/shader/execution/faceForward.bin": "27b6e4a7",
-  "webgpu/shader/execution/floor.bin": "5bb5098b",
-  "webgpu/shader/execution/fma.bin": "daace9a4",
-  "webgpu/shader/execution/fract.bin": "be5f0334",
-  "webgpu/shader/execution/frexp.bin": "c9efaf7c",
-  "webgpu/shader/execution/inverseSqrt.bin": "8a50b907",
-  "webgpu/shader/execution/ldexp.bin": "cb4cea21",
-  "webgpu/shader/execution/length.bin": "a1b9fbeb",
-  "webgpu/shader/execution/log.bin": "9f2eb7c3",
-  "webgpu/shader/execution/log2.bin": "9ee7d861",
-  "webgpu/shader/execution/max.bin": "11e4608e",
-  "webgpu/shader/execution/min.bin": "7a084c44",
-  "webgpu/shader/execution/mix.bin": "7b892a4f",
-  "webgpu/shader/execution/modf.bin": "b3bf26d7",
-  "webgpu/shader/execution/normalize.bin": "18eba01d",
-  "webgpu/shader/execution/pack2x16float.bin": "82df446e",
-  "webgpu/shader/execution/pow.bin": "d3a05344",
-  "webgpu/shader/execution/quantizeToF16.bin": "7793770e",
-  "webgpu/shader/execution/radians.bin": "582c1f6b",
-  "webgpu/shader/execution/reflect.bin": "9161d6e5",
-  "webgpu/shader/execution/refract.bin": "817b59aa",
-  "webgpu/shader/execution/round.bin": "cb881aa2",
-  "webgpu/shader/execution/saturate.bin": "3716605e",
-  "webgpu/shader/execution/sign.bin": "549ac92f",
-  "webgpu/shader/execution/sin.bin": "5ec5bcb7",
-  "webgpu/shader/execution/sinh.bin": "62f6b736",
-  "webgpu/shader/execution/smoothstep.bin": "aa97768",
-  "webgpu/shader/execution/sqrt.bin": "d0a134ce",
-  "webgpu/shader/execution/step.bin": "b8035bb9",
-  "webgpu/shader/execution/tan.bin": "b34366cd",
-  "webgpu/shader/execution/tanh.bin": "8f5edddc",
-  "webgpu/shader/execution/transpose.bin": "1aa2de65",
-  "webgpu/shader/execution/trunc.bin": "cf43e3f7",
-  "webgpu/shader/execution/unpack2x16float.bin": "57ea7c02",
-  "webgpu/shader/execution/unpack2x16snorm.bin": "17fd3f86",
-  "webgpu/shader/execution/unpack2x16unorm.bin": "fc68bc4b",
-  "webgpu/shader/execution/unpack4x8snorm.bin": "fef504c1",
-  "webgpu/shader/execution/unpack4x8unorm.bin": "e8d8de93",
-  "webgpu/shader/execution/unary/af_arithmetic.bin": "14c0612a",
-  "webgpu/shader/execution/unary/af_assignment.bin": "3ad4afc",
-  "webgpu/shader/execution/unary/bool_conversion.bin": "15f7f3fb",
-  "webgpu/shader/execution/unary/f16_arithmetic.bin": "4a20db6d",
-  "webgpu/shader/execution/unary/f16_conversion.bin": "31f72f5a",
-  "webgpu/shader/execution/unary/f32_arithmetic.bin": "f1c311cb",
-  "webgpu/shader/execution/unary/f32_conversion.bin": "7539cdb3",
-  "webgpu/shader/execution/unary/i32_arithmetic.bin": "de945eec",
-  "webgpu/shader/execution/unary/i32_conversion.bin": "1728a03e",
-  "webgpu/shader/execution/unary/u32_conversion.bin": "9e6ca0ce",
-  "webgpu/shader/execution/unary/ai_assignment.bin": "1fd685a2",
-  "webgpu/shader/execution/binary/ai_arithmetic.bin": "90e651f4",
-  "webgpu/shader/execution/unary/ai_arithmetic.bin": "ba31d178",
-  "webgpu/shader/execution/binary/af_matrix_matrix_multiplication.bin": "bc8b52ef",
-  "webgpu/shader/execution/binary/af_matrix_scalar_multiplication.bin": "54edf6a2",
-  "webgpu/shader/execution/binary/af_matrix_vector_multiplication.bin": "43b036b1",
-  "webgpu/shader/execution/derivatives.bin": "65c15fc3",
-  "webgpu/shader/execution/fwidth.bin": "cc91c875"
+  "webgpu/shader/execution/binary/af_addition.bin": "d0c1b760",
+  "webgpu/shader/execution/binary/af_logical.bin": "ca60ce77",
+  "webgpu/shader/execution/binary/af_division.bin": "47ae1ca1",
+  "webgpu/shader/execution/binary/af_matrix_addition.bin": "afaf9bae",
+  "webgpu/shader/execution/binary/af_matrix_subtraction.bin": "42433bf3",
+  "webgpu/shader/execution/binary/af_multiplication.bin": "babfc501",
+  "webgpu/shader/execution/binary/af_remainder.bin": "19995293",
+  "webgpu/shader/execution/binary/af_subtraction.bin": "62f090b9",
+  "webgpu/shader/execution/binary/f16_addition.bin": "540ae334",
+  "webgpu/shader/execution/binary/f16_logical.bin": "c1f09c30",
+  "webgpu/shader/execution/binary/f16_division.bin": "b4eabc05",
+  "webgpu/shader/execution/binary/f16_matrix_addition.bin": "6b9113b",
+  "webgpu/shader/execution/binary/f16_matrix_matrix_multiplication.bin": "a7362ff1",
+  "webgpu/shader/execution/binary/f16_matrix_scalar_multiplication.bin": "4ac4e5bb",
+  "webgpu/shader/execution/binary/f16_matrix_subtraction.bin": "93d4d43a",
+  "webgpu/shader/execution/binary/f16_matrix_vector_multiplication.bin": "beed89d5",
+  "webgpu/shader/execution/binary/f16_multiplication.bin": "6b5f0d51",
+  "webgpu/shader/execution/binary/f16_remainder.bin": "a1f499b3",
+  "webgpu/shader/execution/binary/f16_subtraction.bin": "61a571d5",
+  "webgpu/shader/execution/binary/f32_addition.bin": "fa6cc596",
+  "webgpu/shader/execution/binary/f32_logical.bin": "2b155b60",
+  "webgpu/shader/execution/binary/f32_division.bin": "243c9ce6",
+  "webgpu/shader/execution/binary/f32_matrix_addition.bin": "d3bc6ed6",
+  "webgpu/shader/execution/binary/f32_matrix_matrix_multiplication.bin": "2a4c1527",
+  "webgpu/shader/execution/binary/f32_matrix_scalar_multiplication.bin": "d695442",
+  "webgpu/shader/execution/binary/f32_matrix_subtraction.bin": "b306b19",
+  "webgpu/shader/execution/binary/f32_matrix_vector_multiplication.bin": "aac6cbfd",
+  "webgpu/shader/execution/binary/f32_multiplication.bin": "a21303f5",
+  "webgpu/shader/execution/binary/f32_remainder.bin": "79e462a1",
+  "webgpu/shader/execution/binary/f32_subtraction.bin": "4e6bbf38",
+  "webgpu/shader/execution/binary/i32_arithmetic.bin": "167760cc",
+  "webgpu/shader/execution/binary/i32_comparison.bin": "6a9f856a",
+  "webgpu/shader/execution/binary/u32_arithmetic.bin": "ac424b44",
+  "webgpu/shader/execution/binary/u32_comparison.bin": "a9e71302",
+  "webgpu/shader/execution/abs.bin": "a42729c4",
+  "webgpu/shader/execution/acos.bin": "664a5662",
+  "webgpu/shader/execution/acosh.bin": "d3fb8eb0",
+  "webgpu/shader/execution/asin.bin": "5a4f5b9e",
+  "webgpu/shader/execution/asinh.bin": "3ce3fe4d",
+  "webgpu/shader/execution/atan.bin": "759d432",
+  "webgpu/shader/execution/atan2.bin": "95008607",
+  "webgpu/shader/execution/atanh.bin": "569bd1b6",
+  "webgpu/shader/execution/bitcast.bin": "4329e501",
+  "webgpu/shader/execution/ceil.bin": "55cc76e5",
+  "webgpu/shader/execution/clamp.bin": "d580a273",
+  "webgpu/shader/execution/cos.bin": "3107bc4b",
+  "webgpu/shader/execution/cosh.bin": "d36c86cc",
+  "webgpu/shader/execution/cross.bin": "e48c39ba",
+  "webgpu/shader/execution/degrees.bin": "f74b63d2",
+  "webgpu/shader/execution/determinant.bin": "f07e1160",
+  "webgpu/shader/execution/distance.bin": "93156a89",
+  "webgpu/shader/execution/dot.bin": "4e2fe407",
+  "webgpu/shader/execution/exp.bin": "3b269b18",
+  "webgpu/shader/execution/exp2.bin": "7aeeeaf6",
+  "webgpu/shader/execution/faceForward.bin": "451ffbd8",
+  "webgpu/shader/execution/floor.bin": "37131d74",
+  "webgpu/shader/execution/fma.bin": "30111350",
+  "webgpu/shader/execution/fract.bin": "5ef13392",
+  "webgpu/shader/execution/frexp.bin": "da764bc0",
+  "webgpu/shader/execution/inverseSqrt.bin": "6ff34703",
+  "webgpu/shader/execution/ldexp.bin": "5016cec9",
+  "webgpu/shader/execution/length.bin": "f236d2e7",
+  "webgpu/shader/execution/log.bin": "1c54f128",
+  "webgpu/shader/execution/log2.bin": "e44e2370",
+  "webgpu/shader/execution/max.bin": "eb4c1901",
+  "webgpu/shader/execution/min.bin": "f8c70a2b",
+  "webgpu/shader/execution/mix.bin": "df3b3f62",
+  "webgpu/shader/execution/modf.bin": "b600b26f",
+  "webgpu/shader/execution/normalize.bin": "7af3a3d2",
+  "webgpu/shader/execution/pack2x16float.bin": "7c67b10e",
+  "webgpu/shader/execution/pow.bin": "ee37f4ba",
+  "webgpu/shader/execution/quantizeToF16.bin": "a7a65754",
+  "webgpu/shader/execution/radians.bin": "51d423b9",
+  "webgpu/shader/execution/reflect.bin": "3ba4eda6",
+  "webgpu/shader/execution/refract.bin": "13fc4914",
+  "webgpu/shader/execution/round.bin": "9155b88b",
+  "webgpu/shader/execution/saturate.bin": "73cecf71",
+  "webgpu/shader/execution/sign.bin": "68d61a83",
+  "webgpu/shader/execution/sin.bin": "44219876",
+  "webgpu/shader/execution/sinh.bin": "158d261d",
+  "webgpu/shader/execution/smoothstep.bin": "7129c56b",
+  "webgpu/shader/execution/sqrt.bin": "9aaaf8aa",
+  "webgpu/shader/execution/step.bin": "85858027",
+  "webgpu/shader/execution/tan.bin": "dbbda634",
+  "webgpu/shader/execution/tanh.bin": "8c540d5c",
+  "webgpu/shader/execution/transpose.bin": "a676fc9a",
+  "webgpu/shader/execution/trunc.bin": "35ab398d",
+  "webgpu/shader/execution/unpack2x16float.bin": "eb9294c9",
+  "webgpu/shader/execution/unpack2x16snorm.bin": "7208eb73",
+  "webgpu/shader/execution/unpack2x16unorm.bin": "20d9669b",
+  "webgpu/shader/execution/unpack4x8snorm.bin": "c77e1a72",
+  "webgpu/shader/execution/unpack4x8unorm.bin": "d80caf66",
+  "webgpu/shader/execution/unary/af_arithmetic.bin": "963c3185",
+  "webgpu/shader/execution/unary/af_assignment.bin": "9e8a3b3f",
+  "webgpu/shader/execution/unary/bool_conversion.bin": "eee7a40c",
+  "webgpu/shader/execution/unary/f16_arithmetic.bin": "aaea9f75",
+  "webgpu/shader/execution/unary/f16_conversion.bin": "5b26998a",
+  "webgpu/shader/execution/unary/f32_arithmetic.bin": "65dfc2ac",
+  "webgpu/shader/execution/unary/f32_conversion.bin": "cd874be3",
+  "webgpu/shader/execution/unary/i32_arithmetic.bin": "af4c0e43",
+  "webgpu/shader/execution/unary/i32_conversion.bin": "5b6e4d9",
+  "webgpu/shader/execution/unary/u32_conversion.bin": "229649a6",
+  "webgpu/shader/execution/unary/ai_assignment.bin": "8efcf261",
+  "webgpu/shader/execution/binary/ai_arithmetic.bin": "a57ee284",
+  "webgpu/shader/execution/unary/ai_arithmetic.bin": "948016b6",
+  "webgpu/shader/execution/binary/af_matrix_matrix_multiplication.bin": "52c24212",
+  "webgpu/shader/execution/binary/af_matrix_scalar_multiplication.bin": "256556e1",
+  "webgpu/shader/execution/binary/af_matrix_vector_multiplication.bin": "38085521",
+  "webgpu/shader/execution/derivatives.bin": "f38a38ff",
+  "webgpu/shader/execution/fwidth.bin": "4e9fc55d"
 }
\ No newline at end of file
diff --git a/src/resources/cache/webgpu/shader/execution/abs.bin b/src/resources/cache/webgpu/shader/execution/abs.bin
index 4cba9b72dff4..373fef8f3b32 100644
Binary files a/src/resources/cache/webgpu/shader/execution/abs.bin and b/src/resources/cache/webgpu/shader/execution/abs.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/acos.bin b/src/resources/cache/webgpu/shader/execution/acos.bin
index 2ecaaa389a4e..5e311531fef4 100644
Binary files a/src/resources/cache/webgpu/shader/execution/acos.bin and b/src/resources/cache/webgpu/shader/execution/acos.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/acosh.bin b/src/resources/cache/webgpu/shader/execution/acosh.bin
index d48659f3c325..82a3857ebdc6 100644
Binary files a/src/resources/cache/webgpu/shader/execution/acosh.bin and b/src/resources/cache/webgpu/shader/execution/acosh.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/asin.bin b/src/resources/cache/webgpu/shader/execution/asin.bin
index b199953eaf4b..388de445c06b 100644
Binary files a/src/resources/cache/webgpu/shader/execution/asin.bin and b/src/resources/cache/webgpu/shader/execution/asin.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/asinh.bin b/src/resources/cache/webgpu/shader/execution/asinh.bin
index b370c53b0179..120654f685c2 100644
Binary files a/src/resources/cache/webgpu/shader/execution/asinh.bin and b/src/resources/cache/webgpu/shader/execution/asinh.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/atan.bin b/src/resources/cache/webgpu/shader/execution/atan.bin
index 6ab0ba106a9e..e81af87e15e0 100644
Binary files a/src/resources/cache/webgpu/shader/execution/atan.bin and b/src/resources/cache/webgpu/shader/execution/atan.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/atanh.bin b/src/resources/cache/webgpu/shader/execution/atanh.bin
index e6a190b35df5..a7fee794094d 100644
Binary files a/src/resources/cache/webgpu/shader/execution/atanh.bin and b/src/resources/cache/webgpu/shader/execution/atanh.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/bitcast.bin b/src/resources/cache/webgpu/shader/execution/bitcast.bin
index ead299d5e78f..e743a092553e 100644
Binary files a/src/resources/cache/webgpu/shader/execution/bitcast.bin and b/src/resources/cache/webgpu/shader/execution/bitcast.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/ceil.bin b/src/resources/cache/webgpu/shader/execution/ceil.bin
index 9b93ed416f64..02cf23324cdf 100644
Binary files a/src/resources/cache/webgpu/shader/execution/ceil.bin and b/src/resources/cache/webgpu/shader/execution/ceil.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/cos.bin b/src/resources/cache/webgpu/shader/execution/cos.bin
index 4e34eff3f1b1..a5d8573c6257 100644
Binary files a/src/resources/cache/webgpu/shader/execution/cos.bin and b/src/resources/cache/webgpu/shader/execution/cos.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/cosh.bin b/src/resources/cache/webgpu/shader/execution/cosh.bin
index 5b30d2786c5e..25e8750cc7fc 100644
Binary files a/src/resources/cache/webgpu/shader/execution/cosh.bin and b/src/resources/cache/webgpu/shader/execution/cosh.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/degrees.bin b/src/resources/cache/webgpu/shader/execution/degrees.bin
index 662558d78aca..eb514cb48b4d 100644
Binary files a/src/resources/cache/webgpu/shader/execution/degrees.bin and b/src/resources/cache/webgpu/shader/execution/degrees.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/distance.bin b/src/resources/cache/webgpu/shader/execution/distance.bin
index 23a4756a69eb..06d0d9a8fc2a 100644
Binary files a/src/resources/cache/webgpu/shader/execution/distance.bin and b/src/resources/cache/webgpu/shader/execution/distance.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/floor.bin b/src/resources/cache/webgpu/shader/execution/floor.bin
index b5341907f8ef..a199d0db9ff0 100644
Binary files a/src/resources/cache/webgpu/shader/execution/floor.bin and b/src/resources/cache/webgpu/shader/execution/floor.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/fract.bin b/src/resources/cache/webgpu/shader/execution/fract.bin
index 7f09e8f60b23..bb80e873625f 100644
Binary files a/src/resources/cache/webgpu/shader/execution/fract.bin and b/src/resources/cache/webgpu/shader/execution/fract.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/frexp.bin b/src/resources/cache/webgpu/shader/execution/frexp.bin
index 6811dfa29507..8f87d16a9c95 100644
Binary files a/src/resources/cache/webgpu/shader/execution/frexp.bin and b/src/resources/cache/webgpu/shader/execution/frexp.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/length.bin b/src/resources/cache/webgpu/shader/execution/length.bin
index 3644d9b683ac..db42153edff7 100644
Binary files a/src/resources/cache/webgpu/shader/execution/length.bin and b/src/resources/cache/webgpu/shader/execution/length.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/log.bin b/src/resources/cache/webgpu/shader/execution/log.bin
index ba591faad8a0..a5e62f8e02d8 100644
Binary files a/src/resources/cache/webgpu/shader/execution/log.bin and b/src/resources/cache/webgpu/shader/execution/log.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/log2.bin b/src/resources/cache/webgpu/shader/execution/log2.bin
index 00641ce119cf..f19d77f41097 100644
Binary files a/src/resources/cache/webgpu/shader/execution/log2.bin and b/src/resources/cache/webgpu/shader/execution/log2.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/modf.bin b/src/resources/cache/webgpu/shader/execution/modf.bin
index 363cc161fd72..74259a23a6f8 100644
Binary files a/src/resources/cache/webgpu/shader/execution/modf.bin and b/src/resources/cache/webgpu/shader/execution/modf.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/pack2x16float.bin b/src/resources/cache/webgpu/shader/execution/pack2x16float.bin
index e95227d36e50..a7b99a0a6cfd 100644
Binary files a/src/resources/cache/webgpu/shader/execution/pack2x16float.bin and b/src/resources/cache/webgpu/shader/execution/pack2x16float.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/pow.bin b/src/resources/cache/webgpu/shader/execution/pow.bin
index 4f5faf3293fa..f66ec5ca2fbe 100644
Binary files a/src/resources/cache/webgpu/shader/execution/pow.bin and b/src/resources/cache/webgpu/shader/execution/pow.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/quantizeToF16.bin b/src/resources/cache/webgpu/shader/execution/quantizeToF16.bin
index 9e4308d5cd30..d6d75befc06b 100644
Binary files a/src/resources/cache/webgpu/shader/execution/quantizeToF16.bin and b/src/resources/cache/webgpu/shader/execution/quantizeToF16.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/radians.bin b/src/resources/cache/webgpu/shader/execution/radians.bin
index f5285d108778..731e6be24ed5 100644
Binary files a/src/resources/cache/webgpu/shader/execution/radians.bin and b/src/resources/cache/webgpu/shader/execution/radians.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/round.bin b/src/resources/cache/webgpu/shader/execution/round.bin
index c3b30b68f0a1..5ccab9e661c4 100644
Binary files a/src/resources/cache/webgpu/shader/execution/round.bin and b/src/resources/cache/webgpu/shader/execution/round.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/saturate.bin b/src/resources/cache/webgpu/shader/execution/saturate.bin
index 2e1eb821a9e7..e7402f25af73 100644
Binary files a/src/resources/cache/webgpu/shader/execution/saturate.bin and b/src/resources/cache/webgpu/shader/execution/saturate.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/sign.bin b/src/resources/cache/webgpu/shader/execution/sign.bin
index 033f2e8158f6..576019c008ee 100644
Binary files a/src/resources/cache/webgpu/shader/execution/sign.bin and b/src/resources/cache/webgpu/shader/execution/sign.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/sin.bin b/src/resources/cache/webgpu/shader/execution/sin.bin
index a2ca632008ff..bdbbfe2bd539 100644
Binary files a/src/resources/cache/webgpu/shader/execution/sin.bin and b/src/resources/cache/webgpu/shader/execution/sin.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/sinh.bin b/src/resources/cache/webgpu/shader/execution/sinh.bin
index 1176cd472bf2..b4b051a226ae 100644
Binary files a/src/resources/cache/webgpu/shader/execution/sinh.bin and b/src/resources/cache/webgpu/shader/execution/sinh.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/sqrt.bin b/src/resources/cache/webgpu/shader/execution/sqrt.bin
index 6dd8088c0898..64a7db70d004 100644
Binary files a/src/resources/cache/webgpu/shader/execution/sqrt.bin and b/src/resources/cache/webgpu/shader/execution/sqrt.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/tan.bin b/src/resources/cache/webgpu/shader/execution/tan.bin
index 572bee4df2a5..5af3e740d213 100644
Binary files a/src/resources/cache/webgpu/shader/execution/tan.bin and b/src/resources/cache/webgpu/shader/execution/tan.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/tanh.bin b/src/resources/cache/webgpu/shader/execution/tanh.bin
index a13028b165f0..9687ff00235c 100644
Binary files a/src/resources/cache/webgpu/shader/execution/tanh.bin and b/src/resources/cache/webgpu/shader/execution/tanh.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/trunc.bin b/src/resources/cache/webgpu/shader/execution/trunc.bin
index ba81e2ada427..e18bb52ed981 100644
Binary files a/src/resources/cache/webgpu/shader/execution/trunc.bin and b/src/resources/cache/webgpu/shader/execution/trunc.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/unary/bool_conversion.bin b/src/resources/cache/webgpu/shader/execution/unary/bool_conversion.bin
index 98a90ea45b9a..f28c275092f1 100644
Binary files a/src/resources/cache/webgpu/shader/execution/unary/bool_conversion.bin and b/src/resources/cache/webgpu/shader/execution/unary/bool_conversion.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/unary/f16_conversion.bin b/src/resources/cache/webgpu/shader/execution/unary/f16_conversion.bin
index 14299da76670..c47b3d0afcb0 100644
Binary files a/src/resources/cache/webgpu/shader/execution/unary/f16_conversion.bin and b/src/resources/cache/webgpu/shader/execution/unary/f16_conversion.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/unary/f32_arithmetic.bin b/src/resources/cache/webgpu/shader/execution/unary/f32_arithmetic.bin
index ebc60029fa60..6e93bec14f76 100644
Binary files a/src/resources/cache/webgpu/shader/execution/unary/f32_arithmetic.bin and b/src/resources/cache/webgpu/shader/execution/unary/f32_arithmetic.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/unary/f32_conversion.bin b/src/resources/cache/webgpu/shader/execution/unary/f32_conversion.bin
index 66b2bc73f889..55e1f5ed945f 100644
Binary files a/src/resources/cache/webgpu/shader/execution/unary/f32_conversion.bin and b/src/resources/cache/webgpu/shader/execution/unary/f32_conversion.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/unary/i32_conversion.bin b/src/resources/cache/webgpu/shader/execution/unary/i32_conversion.bin
index 04841df60785..49969e9221ad 100644
Binary files a/src/resources/cache/webgpu/shader/execution/unary/i32_conversion.bin and b/src/resources/cache/webgpu/shader/execution/unary/i32_conversion.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/unary/u32_conversion.bin b/src/resources/cache/webgpu/shader/execution/unary/u32_conversion.bin
index 277ffc4d76b7..8dfc4e268561 100644
Binary files a/src/resources/cache/webgpu/shader/execution/unary/u32_conversion.bin and b/src/resources/cache/webgpu/shader/execution/unary/u32_conversion.bin differ
diff --git a/src/webgpu/api/operation/shader_module/compilation_info.spec.ts b/src/webgpu/api/operation/shader_module/compilation_info.spec.ts
index 3382dabc3720..c0613087a96a 100644
--- a/src/webgpu/api/operation/shader_module/compilation_info.spec.ts
+++ b/src/webgpu/api/operation/shader_module/compilation_info.spec.ts
@@ -3,7 +3,6 @@ ShaderModule CompilationInfo tests.
 `;
 
 import { makeTestGroup } from '../../../../common/framework/test_group.js';
-import { keysOf } from '../../../../common/util/data_tables.js';
 import { assert } from '../../../../common/util/util.js';
 import { GPUTest } from '../../../gpu_test.js';
 
@@ -79,66 +78,24 @@ const kInvalidShaderSources = [
 
 const kAllShaderSources = [...kValidShaderSources, ...kInvalidShaderSources];
 
-// This is the source the sourcemap refers to.
-const kOriginalSource = new Array(20)
-  .fill(0)
-  .map((_, i) => `original line ${i}`)
-  .join('\n');
-
-const kSourceMaps: { [name: string]: undefined | object } = {
-  none: undefined,
-  empty: {},
-  // A valid source map. It maps `unknown` on lines 4 and line 5 to
-  // `wasUnknown` from lines 20, 21 respectively
-  valid: {
-    version: 3,
-    sources: ['myCode'],
-    sourcesContent: [kOriginalSource],
-    names: ['myMain', 'wasUnknown'],
-    mappings: ';kBAYkCA,OACd;SAElB;gBAKOC;gBACAA',
-  },
-  // not a valid sourcemap
-  invalid: {
-    version: -123,
-    notAnything: {},
-  },
-  // The correct format but this data is for lines 11,12 even
-  // though the source only has 5 or 6 lines
-  nonMatching: {
-    version: 3,
-    sources: ['myCode'],
-    sourcesContent: [kOriginalSource],
-    names: ['myMain'],
-    mappings: ';;;;;;;;;;kBAYkCA,OACd;SAElB',
-  },
-};
-const kSourceMapsKeys = keysOf(kSourceMaps);
-
 g.test('getCompilationInfo_returns')
   .desc(
     `
     Test that getCompilationInfo() can be called on any ShaderModule.
 
-    Note: sourcemaps are not used in the WebGPU API. We are only testing that
-    browser that happen to use them don't fail or crash if the sourcemap is
-    bad or invalid.
-
     - Test for both valid and invalid shader modules.
     - Test for shader modules containing only ASCII and those containing unicode characters.
     - Test that the compilation info for valid shader modules contains no errors.
     - Test that the compilation info for invalid shader modules contains at least one error.`
   )
-  .params(u =>
-    u.combineWithParams(kAllShaderSources).beginSubcases().combine('sourceMapName', kSourceMapsKeys)
-  )
+  .params(u => u.combineWithParams(kAllShaderSources))
   .fn(async t => {
-    const { _code, valid, sourceMapName } = t.params;
+    const { _code, valid } = t.params;
 
     const shaderModule = t.expectGPUError(
       'validation',
       () => {
-        const sourceMap = kSourceMaps[sourceMapName];
-        return t.device.createShaderModule({ code: _code, ...(sourceMap && { sourceMap }) });
+        return t.device.createShaderModule({ code: _code });
       },
       !valid
     );
@@ -171,25 +128,15 @@ g.test('line_number_and_position')
     Test that line numbers reported by compilationInfo either point at an appropriate line and
     position or at 0:0, indicating an unknown position.
 
-    Note: sourcemaps are not used in the WebGPU API. We are only testing that
-    browser that happen to use them don't fail or crash if the sourcemap is
-    bad or invalid.
-
     - Test for invalid shader modules containing containing at least one error.
     - Test for shader modules containing only ASCII and those containing unicode characters.`
   )
-  .params(u =>
-    u
-      .combineWithParams(kInvalidShaderSources)
-      .beginSubcases()
-      .combine('sourceMapName', kSourceMapsKeys)
-  )
+  .params(u => u.combineWithParams(kInvalidShaderSources))
   .fn(async t => {
-    const { _code, _errorLine, _errorLinePos, sourceMapName } = t.params;
+    const { _code, _errorLine, _errorLinePos } = t.params;
 
     const shaderModule = t.expectGPUError('validation', () => {
-      const sourceMap = kSourceMaps[sourceMapName];
-      return t.device.createShaderModule({ code: _code, ...(sourceMap && { sourceMap }) });
+      return t.device.createShaderModule({ code: _code });
     });
 
     const info = await shaderModule.getCompilationInfo();
@@ -232,24 +179,17 @@ g.test('offset_and_length')
   .desc(
     `Test that message offsets and lengths are valid and align with any reported lineNum and linePos.
 
-     Note: sourcemaps are not used in the WebGPU API. We are only testing that
-     browser that happen to use them don't fail or crash if the sourcemap is
-     bad or invalid.
-
     - Test for valid and invalid shader modules.
     - Test for shader modules containing only ASCII and those containing unicode characters.`
   )
-  .params(u =>
-    u.combineWithParams(kAllShaderSources).beginSubcases().combine('sourceMapName', kSourceMapsKeys)
-  )
+  .params(u => u.combineWithParams(kAllShaderSources))
   .fn(async t => {
-    const { _code, valid, sourceMapName } = t.params;
+    const { _code, valid } = t.params;
 
     const shaderModule = t.expectGPUError(
       'validation',
       () => {
-        const sourceMap = kSourceMaps[sourceMapName];
-        return t.device.createShaderModule({ code: _code, ...(sourceMap && { sourceMap }) });
+        return t.device.createShaderModule({ code: _code });
       },
       !valid
     );
diff --git a/src/webgpu/api/operation/texture_view/write.spec.ts b/src/webgpu/api/operation/texture_view/write.spec.ts
index 43b27f2874a5..aa41e7e176ea 100644
--- a/src/webgpu/api/operation/texture_view/write.spec.ts
+++ b/src/webgpu/api/operation/texture_view/write.spec.ts
@@ -36,6 +36,9 @@ const kTextureViewWriteMethods = [
 ] as const;
 type TextureViewWriteMethod = (typeof kTextureViewWriteMethods)[number];
 
+const kTextureViewUsageMethods = ['inherit', 'minimal'] as const;
+type TextureViewUsageMethod = (typeof kTextureViewUsageMethods)[number];
+
 // Src color values to read from a shader array.
 const kColorsFloat = [
   { R: 1.0, G: 0.0, B: 0.0, A: 0.8 },
@@ -271,6 +274,22 @@ function writeTextureAndGetExpectedTexelView(
   return expectedTexelView;
 }
 
+function getTextureViewUsage(
+  viewUsageMethod: TextureViewUsageMethod,
+  minimalUsageForTest: GPUTextureUsageFlags
+) {
+  switch (viewUsageMethod) {
+    case 'inherit':
+      return 0;
+
+    case 'minimal':
+      return minimalUsageForTest;
+
+    default:
+      unreachable();
+  }
+}
+
 g.test('format')
   .desc(
     `Views of every allowed format.
@@ -280,6 +299,7 @@ Read values from color array in the shader, and write it to the texture view via
 - x= every texture format
 - x= sampleCount {1, 4} if valid
 - x= every possible view write method (see above)
+- x= inherited or minimal texture view usage
 
 TODO: Test sampleCount > 1 for 'render-pass-store' after extending copySinglePixelTextureToBufferUsingComputePass
       to read multiple pixels from multisampled textures. [1]
@@ -318,6 +338,7 @@ TODO: Test rgb10a2uint when TexelRepresentation.numericRange is made per-compone
         }
         return true;
       })
+      .combine('viewUsageMethod', kTextureViewUsageMethods)
   )
   .beforeAllSubcases(t => {
     const { format, method } = t.params;
@@ -332,13 +353,12 @@ TODO: Test rgb10a2uint when TexelRepresentation.numericRange is made per-compone
     }
   })
   .fn(t => {
-    const { format, method, sampleCount } = t.params;
+    const { format, method, sampleCount, viewUsageMethod } = t.params;
 
-    const usage =
-      GPUTextureUsage.COPY_SRC |
-      (method.includes('storage')
-        ? GPUTextureUsage.STORAGE_BINDING
-        : GPUTextureUsage.RENDER_ATTACHMENT);
+    const textureUsageForMethod = method.includes('storage')
+      ? GPUTextureUsage.STORAGE_BINDING
+      : GPUTextureUsage.RENDER_ATTACHMENT;
+    const usage = GPUTextureUsage.COPY_SRC | textureUsageForMethod;
 
     const texture = t.createTextureTracked({
       format,
@@ -347,7 +367,9 @@ TODO: Test rgb10a2uint when TexelRepresentation.numericRange is made per-compone
       sampleCount,
     });
 
-    const view = texture.createView();
+    const view = texture.createView({
+      usage: getTextureViewUsage(viewUsageMethod, textureUsageForMethod),
+    });
     const expectedTexelView = writeTextureAndGetExpectedTexelView(
       t,
       method,
diff --git a/src/webgpu/api/validation/capability_checks/features/clip_distances.spec.ts b/src/webgpu/api/validation/capability_checks/features/clip_distances.spec.ts
new file mode 100644
index 000000000000..02ffd9e99db6
--- /dev/null
+++ b/src/webgpu/api/validation/capability_checks/features/clip_distances.spec.ts
@@ -0,0 +1,161 @@
+import { range } from '../../../../../common/util/util.js';
+import { align } from '../../../../util/math.js';
+import { kMaximumLimitBaseParams, makeLimitTestGroup } from '../limits/limit_utils.js';
+
+function getPipelineDescriptorWithClipDistances(
+  device: GPUDevice,
+  interStageShaderVariables: number,
+  pointList: boolean,
+  clipDistances: number,
+  startLocation: number = 0
+): GPURenderPipelineDescriptor {
+  const vertexOutputVariables =
+    interStageShaderVariables - (pointList ? 1 : 0) - align(clipDistances, 4) / 4;
+  const maxVertexOutputVariables =
+    device.limits.maxInterStageShaderVariables - (pointList ? 1 : 0) - align(clipDistances, 4) / 4;
+
+  const varyings = `
+      ${range(
+        vertexOutputVariables,
+        i => `@location(${i + startLocation}) v4_${i + startLocation}: vec4f,`
+      ).join('\n')}
+  `;
+
+  const code = `
+    // test value                        : ${interStageShaderVariables}
+    // maxInterStageShaderVariables     : ${device.limits.maxInterStageShaderVariables}
+    // num variables in vertex shader : ${vertexOutputVariables}${
+      pointList ? ' + point-list' : ''
+    }${
+      clipDistances > 0
+        ? ` + ${align(clipDistances, 4) / 4} (clip_distances[${clipDistances}])`
+        : ''
+    }
+    // maxInterStageVariables:           : ${maxVertexOutputVariables}
+    // num used inter stage variables    : ${vertexOutputVariables}
+    // vertex output start location      : ${startLocation}
+
+    enable clip_distances;
+
+    struct VSOut {
+      @builtin(position) p: vec4f,
+      ${varyings}
+      ${
+        clipDistances > 0
+          ? `@builtin(clip_distances) clipDistances: array<f32, ${clipDistances}>,`
+          : ''
+      }
+    }
+    struct FSIn {
+      ${varyings}
+    }
+    struct FSOut {
+      @location(0) color: vec4f,
+    }
+    @vertex fn vs() -> VSOut {
+      var o: VSOut;
+      o.p = vec4f(0);
+      return o;
+    }
+    @fragment fn fs(i: FSIn) -> FSOut {
+      var o: FSOut;
+      o.color = vec4f(0);
+      return o;
+    }
+  `;
+  const module = device.createShaderModule({ code });
+  const pipelineDescriptor: GPURenderPipelineDescriptor = {
+    layout: 'auto',
+    primitive: {
+      topology: pointList ? 'point-list' : 'triangle-list',
+    },
+    vertex: {
+      module,
+    },
+    fragment: {
+      module,
+      targets: [
+        {
+          format: 'rgba8unorm',
+        },
+      ],
+    },
+  };
+  return pipelineDescriptor;
+}
+
+const limit = 'maxInterStageShaderVariables';
+export const { g, description } = makeLimitTestGroup(limit);
+
+g.test('createRenderPipeline,at_over')
+  .desc(`Test using at and over ${limit} limit with clip_distances in createRenderPipeline(Async)`)
+  .params(
+    kMaximumLimitBaseParams
+      .combine('async', [false, true])
+      .combine('pointList', [false, true])
+      .combine('clipDistances', [1, 2, 3, 4, 5, 6, 7, 8])
+  )
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('clip-distances');
+  })
+  .fn(async t => {
+    const { limitTest, testValueName, async, pointList, clipDistances } = t.params;
+    await t.testDeviceWithRequestedMaximumLimits(
+      limitTest,
+      testValueName,
+      async ({ device, testValue, shouldError }) => {
+        const pipelineDescriptor = getPipelineDescriptorWithClipDistances(
+          device,
+          testValue,
+          pointList,
+          clipDistances
+        );
+
+        await t.testCreateRenderPipeline(pipelineDescriptor, async, shouldError);
+      },
+      undefined,
+      ['clip-distances']
+    );
+  });
+
+g.test('createRenderPipeline,max_vertex_output_location')
+  .desc(`Test using clip_distances will limit the maximum value of vertex output location`)
+  .params(u =>
+    u
+      .combine('pointList', [false, true])
+      .combine('clipDistances', [1, 2, 3, 4, 5, 6, 7, 8])
+      .combine('startLocation', [0, 1, 2])
+  )
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('clip-distances');
+  })
+  .fn(async t => {
+    const { pointList, clipDistances, startLocation } = t.params;
+
+    const maxInterStageShaderVariables = t.adapter.limits.maxInterStageShaderVariables;
+    const deviceInTest = await t.requestDeviceTracked(t.adapter, {
+      requiredFeatures: ['clip-distances'],
+      requiredLimits: {
+        maxInterStageShaderVariables: t.adapter.limits.maxInterStageShaderVariables,
+      },
+    });
+    const pipelineDescriptor = getPipelineDescriptorWithClipDistances(
+      deviceInTest,
+      maxInterStageShaderVariables,
+      pointList,
+      clipDistances,
+      startLocation
+    );
+    const vertexOutputVariables =
+      maxInterStageShaderVariables - (pointList ? 1 : 0) - align(clipDistances, 4) / 4;
+    const maxLocationInTest = startLocation + vertexOutputVariables - 1;
+    const maxAllowedLocation = maxInterStageShaderVariables - 1 - align(clipDistances, 4) / 4;
+    const shouldError = maxLocationInTest > maxAllowedLocation;
+
+    deviceInTest.pushErrorScope('validation');
+    deviceInTest.createRenderPipeline(pipelineDescriptor);
+    const error = await deviceInTest.popErrorScope();
+    t.expect(!!error === shouldError, `${error?.message || 'no error when one was expected'}`);
+
+    deviceInTest.destroy();
+  });
diff --git a/src/webgpu/api/validation/capability_checks/features/texture_formats.spec.ts b/src/webgpu/api/validation/capability_checks/features/texture_formats.spec.ts
index 6941dac6b265..3def716d59de 100644
--- a/src/webgpu/api/validation/capability_checks/features/texture_formats.spec.ts
+++ b/src/webgpu/api/validation/capability_checks/features/texture_formats.spec.ts
@@ -5,6 +5,7 @@ Tests for capability checking for features enabling optional texture formats.
 import { makeTestGroup } from '../../../../../common/framework/test_group.js';
 import { getGPU } from '../../../../../common/util/navigator_gpu.js';
 import { assert } from '../../../../../common/util/util.js';
+import { kCanvasTextureFormats } from '../../../../capability_info.js';
 import { kAllTextureFormats, kTextureFormatInfo } from '../../../../format_info.js';
 import { kAllCanvasTypes, createCanvas } from '../../../../util/create_elements.js';
 import { ValidationTest } from '../../validation_test.js';
@@ -161,15 +162,15 @@ g.test('canvas_configuration')
       usage: GPUTextureUsage.COPY_SRC | GPUTextureUsage.COPY_DST,
     };
 
-    if (enable_required_feature) {
-      t.expectValidationError(() => {
-        ctx.configure(canvasConf);
-      });
-    } else {
-      t.shouldThrow('TypeError', () => {
-        ctx.configure(canvasConf);
-      });
-    }
+    const expectedError =
+      enable_required_feature &&
+      (kCanvasTextureFormats as unknown as Array<GPUTextureFormat>).includes(format)
+        ? false
+        : 'TypeError';
+
+    t.shouldThrow(expectedError, () => {
+      ctx.configure(canvasConf);
+    });
   });
 
 g.test('canvas_configuration_view_formats')
diff --git a/src/webgpu/api/validation/capability_checks/limits/limit_utils.ts b/src/webgpu/api/validation/capability_checks/limits/limit_utils.ts
index ea44b11c9148..14f1642cea9f 100644
--- a/src/webgpu/api/validation/capability_checks/limits/limit_utils.ts
+++ b/src/webgpu/api/validation/capability_checks/limits/limit_utils.ts
@@ -535,11 +535,16 @@ export class LimitTestsImpl extends GPUTestBase {
     limitTest: MaximumLimitValueTest,
     testValueName: MaximumTestValue,
     fn: (inputs: MaximumLimitTestInputs) => void | Promise<void>,
-    extraLimits?: LimitsRequest
+    extraLimits?: LimitsRequest,
+    extraFeatures: GPUFeatureName[] = []
   ) {
     assert(!this._device);
 
-    const deviceAndLimits = await this._getDeviceWithRequestedMaximumLimit(limitTest, extraLimits);
+    const deviceAndLimits = await this._getDeviceWithRequestedMaximumLimit(
+      limitTest,
+      extraLimits,
+      extraFeatures
+    );
     // If we request over the limit requestDevice will throw
     if (!deviceAndLimits) {
       return;
diff --git a/src/webgpu/api/validation/capability_checks/limits/maxInterStageShaderComponents.spec.ts b/src/webgpu/api/validation/capability_checks/limits/maxInterStageShaderComponents.spec.ts
deleted file mode 100644
index 1963d9f28c6c..000000000000
--- a/src/webgpu/api/validation/capability_checks/limits/maxInterStageShaderComponents.spec.ts
+++ /dev/null
@@ -1,153 +0,0 @@
-import { range } from '../../../../../common/util/util.js';
-
-import { kMaximumLimitBaseParams, LimitsRequest, makeLimitTestGroup } from './limit_utils.js';
-
-function getPipelineDescriptor(
-  device: GPUDevice,
-  testValue: number,
-  pointList: boolean,
-  frontFacing: boolean,
-  sampleIndex: boolean,
-  sampleMaskIn: boolean,
-  sampleMaskOut: boolean
-): { pipelineDescriptor: GPURenderPipelineDescriptor; code: string } {
-  const success = testValue <= device.limits.maxInterStageShaderComponents;
-
-  const maxVertexOutputComponents =
-    device.limits.maxInterStageShaderComponents - (pointList ? 1 : 0);
-  const maxFragmentInputComponents =
-    device.limits.maxInterStageShaderComponents -
-    (frontFacing ? 1 : 0) -
-    (sampleIndex ? 1 : 0) -
-    (sampleMaskIn ? 1 : 0);
-  const maxOutputComponents = Math.min(maxVertexOutputComponents, maxFragmentInputComponents);
-  const maxInterStageVariables = Math.floor(maxOutputComponents / 4);
-  const maxUserDefinedVertexComponents = Math.floor(maxVertexOutputComponents / 4) * 4;
-  const maxUserDefinedFragmentComponents = Math.floor(maxFragmentInputComponents / 4) * 4;
-
-  const numInterStageVariables = success ? maxInterStageVariables : maxInterStageVariables + 1;
-  const numUserDefinedComponents = numInterStageVariables * 4;
-
-  const varyings = `
-      ${range(numInterStageVariables, i => `@location(${i}) v4_${i}: vec4f,`).join('\n')}
-  `;
-
-  const code = `
-    // test value                        : ${testValue}
-    // maxInterStageShaderComponents     : ${device.limits.maxInterStageShaderComponents}
-    // num components in vertex shader   : ${numUserDefinedComponents}${
-      pointList ? ' + point-list' : ''
-    }
-    // num components in fragment shader : ${numUserDefinedComponents}${
-      frontFacing ? ' + front-facing' : ''
-    }${sampleIndex ? ' + sample_index' : ''}${sampleMaskIn ? ' + sample_mask' : ''}
-    // maxUserDefinedVertexShaderOutputComponents   : ${maxUserDefinedVertexComponents}
-    // maxUserDefinedFragmentShaderInputComponents  : ${maxUserDefinedFragmentComponents}
-    // maxInterStageVariables:           : ${maxInterStageVariables}
-    // num used inter stage variables    : ${numInterStageVariables}
-
-    struct VSOut {
-      @builtin(position) p: vec4f,
-      ${varyings}
-    }
-    struct FSIn {
-      ${frontFacing ? '@builtin(front_facing) frontFacing: bool,' : ''}
-      ${sampleIndex ? '@builtin(sample_index) sampleIndex: u32,' : ''}
-      ${sampleMaskIn ? '@builtin(sample_mask) sampleMask: u32,' : ''}
-      ${varyings}
-    }
-    struct FSOut {
-      @location(0) color: vec4f,
-      ${sampleMaskOut ? '@builtin(sample_mask) sampleMask: u32,' : ''}
-    }
-    @vertex fn vs() -> VSOut {
-      var o: VSOut;
-      o.p = vec4f(0);
-      return o;
-    }
-    @fragment fn fs(i: FSIn) -> FSOut {
-      var o: FSOut;
-      o.color = vec4f(0);
-      return o;
-    }
-  `;
-  const module = device.createShaderModule({ code });
-  const pipelineDescriptor: GPURenderPipelineDescriptor = {
-    layout: 'auto',
-    primitive: {
-      topology: pointList ? 'point-list' : 'triangle-list',
-    },
-    vertex: {
-      module,
-      entryPoint: 'vs',
-    },
-    fragment: {
-      module,
-      entryPoint: 'fs',
-      targets: [
-        {
-          format: 'rgba8unorm',
-        },
-      ],
-    },
-  };
-  return { pipelineDescriptor, code };
-}
-
-const limit = 'maxInterStageShaderComponents';
-export const { g, description } = makeLimitTestGroup(limit);
-
-g.test('createRenderPipeline,at_over')
-  .desc(`Test using at and over ${limit} limit in createRenderPipeline(Async)`)
-  .params(
-    kMaximumLimitBaseParams
-      .combine('async', [false, true])
-      .combine('pointList', [false, true])
-      .combine('frontFacing', [false, true])
-      .combine('sampleIndex', [false, true])
-      .combine('sampleMaskIn', [false, true])
-      .combine('sampleMaskOut', [false, true])
-  )
-  .beforeAllSubcases(t => {
-    if (t.isCompatibility) {
-      t.skipIf(
-        t.params.sampleMaskIn || t.params.sampleMaskOut,
-        'sample_mask not supported in compatibility mode'
-      );
-      t.skipIf(t.params.sampleIndex, 'sample_index not supported in compatibility mode');
-    }
-  })
-  .fn(async t => {
-    const {
-      limitTest,
-      testValueName,
-      async,
-      pointList,
-      frontFacing,
-      sampleIndex,
-      sampleMaskIn,
-      sampleMaskOut,
-    } = t.params;
-    // Request the largest value of maxInterStageShaderVariables to allow the test using as many
-    // inter-stage shader components as possible without being limited by
-    // maxInterStageShaderVariables.
-    const extraLimits: LimitsRequest = { maxInterStageShaderVariables: 'adapterLimit' };
-    await t.testDeviceWithRequestedMaximumLimits(
-      limitTest,
-      testValueName,
-      async ({ device, testValue, shouldError }) => {
-        const { pipelineDescriptor, code } = getPipelineDescriptor(
-          device,
-          testValue,
-          pointList,
-          frontFacing,
-          sampleIndex,
-          sampleMaskIn,
-          sampleMaskOut
-        );
-
-        await t.testCreateRenderPipeline(pipelineDescriptor, async, shouldError, code);
-      },
-      extraLimits
-    );
-  });
diff --git a/src/webgpu/api/validation/capability_checks/limits/maxInterStageShaderVariables.spec.ts b/src/webgpu/api/validation/capability_checks/limits/maxInterStageShaderVariables.spec.ts
index e54b7f7df178..5298e8c21587 100644
--- a/src/webgpu/api/validation/capability_checks/limits/maxInterStageShaderVariables.spec.ts
+++ b/src/webgpu/api/validation/capability_checks/limits/maxInterStageShaderVariables.spec.ts
@@ -1,26 +1,86 @@
+import { range } from '../../../../../common/util/util.js';
+
 import { kMaximumLimitBaseParams, makeLimitTestGroup } from './limit_utils.js';
 
-function getPipelineDescriptor(device: GPUDevice, testValue: number): GPURenderPipelineDescriptor {
+function getPipelineDescriptor(
+  device: GPUDevice,
+  testValue: number,
+  pointList: boolean,
+  frontFacing: boolean,
+  sampleIndex: boolean,
+  sampleMaskIn: boolean,
+  sampleMaskOut: boolean
+): GPURenderPipelineDescriptor {
+  const vertexOutputVariables = testValue - (pointList ? 1 : 0);
+  const fragmentInputVariables = testValue - (frontFacing || sampleIndex || sampleMaskIn ? 1 : 0);
+  const numInterStageVariables = Math.min(vertexOutputVariables, fragmentInputVariables);
+
+  const maxVertexOutputVariables = device.limits.maxInterStageShaderVariables - (pointList ? 1 : 0);
+  const maxFragmentInputVariables =
+    device.limits.maxInterStageShaderVariables -
+    (frontFacing || sampleIndex || sampleMaskIn ? 1 : 0);
+  const maxInterStageVariables = Math.min(maxVertexOutputVariables, maxFragmentInputVariables);
+
+  const varyings = `
+      ${range(numInterStageVariables, i => `@location(${i}) v4_${i}: vec4f,`).join('\n')}
+  `;
+
   const code = `
+    // test value                        : ${testValue}
+    // maxInterStageShaderVariables     : ${device.limits.maxInterStageShaderVariables}
+    // num variables in vertex shader : ${vertexOutputVariables}${pointList ? ' + point-list' : ''}
+    // num variables in fragment shader : ${fragmentInputVariables}${
+      frontFacing ? ' + front-facing' : ''
+    }${sampleIndex ? ' + sample_index' : ''}${sampleMaskIn ? ' + sample_mask' : ''}
+    // maxInterStageVariables:           : ${maxInterStageVariables}
+    // num used inter stage variables    : ${numInterStageVariables}
+
     struct VSOut {
       @builtin(position) p: vec4f,
-      @location(${testValue}) v: f32,
+      ${varyings}
+    }
+    struct FSIn {
+      ${frontFacing ? '@builtin(front_facing) frontFacing: bool,' : ''}
+      ${sampleIndex ? '@builtin(sample_index) sampleIndex: u32,' : ''}
+      ${sampleMaskIn ? '@builtin(sample_mask) sampleMask: u32,' : ''}
+      ${varyings}
+    }
+    struct FSOut {
+      @location(0) color: vec4f,
+      ${sampleMaskOut ? '@builtin(sample_mask) sampleMask: u32,' : ''}
     }
     @vertex fn vs() -> VSOut {
       var o: VSOut;
       o.p = vec4f(0);
-      o.v = 1.0;
+      return o;
+    }
+    @fragment fn fs(i: FSIn) -> FSOut {
+      var o: FSOut;
+      o.color = vec4f(0);
       return o;
     }
   `;
   const module = device.createShaderModule({ code });
-  return {
+  const pipelineDescriptor: GPURenderPipelineDescriptor = {
     layout: 'auto',
+    primitive: {
+      topology: pointList ? 'point-list' : 'triangle-list',
+    },
     vertex: {
       module,
       entryPoint: 'vs',
     },
+    fragment: {
+      module,
+      entryPoint: 'fs',
+      targets: [
+        {
+          format: 'rgba8unorm',
+        },
+      ],
+    },
   };
+  return pipelineDescriptor;
 }
 
 const limit = 'maxInterStageShaderVariables';
@@ -28,15 +88,48 @@ export const { g, description } = makeLimitTestGroup(limit);
 
 g.test('createRenderPipeline,at_over')
   .desc(`Test using at and over ${limit} limit in createRenderPipeline(Async)`)
-  .params(kMaximumLimitBaseParams.combine('async', [false, true]))
+  .params(
+    kMaximumLimitBaseParams
+      .combine('async', [false, true])
+      .combine('pointList', [false, true])
+      .combine('frontFacing', [false, true])
+      .combine('sampleIndex', [false, true])
+      .combine('sampleMaskIn', [false, true])
+      .combine('sampleMaskOut', [false, true])
+  )
+  .beforeAllSubcases(t => {
+    if (t.isCompatibility) {
+      t.skipIf(
+        t.params.sampleMaskIn || t.params.sampleMaskOut,
+        'sample_mask not supported in compatibility mode'
+      );
+      t.skipIf(t.params.sampleIndex, 'sample_index not supported in compatibility mode');
+    }
+  })
   .fn(async t => {
-    const { limitTest, testValueName, async } = t.params;
+    const {
+      limitTest,
+      testValueName,
+      async,
+      pointList,
+      frontFacing,
+      sampleIndex,
+      sampleMaskIn,
+      sampleMaskOut,
+    } = t.params;
     await t.testDeviceWithRequestedMaximumLimits(
       limitTest,
       testValueName,
       async ({ device, testValue, shouldError }) => {
-        const lastIndex = testValue - 1;
-        const pipelineDescriptor = getPipelineDescriptor(device, lastIndex);
+        const pipelineDescriptor = getPipelineDescriptor(
+          device,
+          testValue,
+          pointList,
+          frontFacing,
+          sampleIndex,
+          sampleMaskIn,
+          sampleMaskOut
+        );
 
         await t.testCreateRenderPipeline(pipelineDescriptor, async, shouldError);
       }
diff --git a/src/webgpu/api/validation/capability_checks/limits/maxVertexAttributes.spec.ts b/src/webgpu/api/validation/capability_checks/limits/maxVertexAttributes.spec.ts
index 9e5aaa144bfa..b37cc9230931 100644
--- a/src/webgpu/api/validation/capability_checks/limits/maxVertexAttributes.spec.ts
+++ b/src/webgpu/api/validation/capability_checks/limits/maxVertexAttributes.spec.ts
@@ -19,6 +19,7 @@ function getPipelineDescriptor(device: GPUDevice, lastIndex: number): GPURenderP
         },
       ],
     },
+    depthStencil: { format: 'depth32float', depthWriteEnabled: true, depthCompare: 'always' },
   };
 }
 
diff --git a/src/webgpu/api/validation/capability_checks/limits/maxVertexBufferArrayStride.spec.ts b/src/webgpu/api/validation/capability_checks/limits/maxVertexBufferArrayStride.spec.ts
index 0af5724f2a2a..be9c7ffd7f7b 100644
--- a/src/webgpu/api/validation/capability_checks/limits/maxVertexBufferArrayStride.spec.ts
+++ b/src/webgpu/api/validation/capability_checks/limits/maxVertexBufferArrayStride.spec.ts
@@ -32,6 +32,7 @@ function getPipelineDescriptor(device: GPUDevice, testValue: number): GPURenderP
         },
       ],
     },
+    depthStencil: { format: 'depth32float', depthWriteEnabled: true, depthCompare: 'always' },
   };
 }
 
diff --git a/src/webgpu/api/validation/capability_checks/limits/maxVertexBuffers.spec.ts b/src/webgpu/api/validation/capability_checks/limits/maxVertexBuffers.spec.ts
index 9a4108cb0c08..02701de0d1e9 100644
--- a/src/webgpu/api/validation/capability_checks/limits/maxVertexBuffers.spec.ts
+++ b/src/webgpu/api/validation/capability_checks/limits/maxVertexBuffers.spec.ts
@@ -19,6 +19,7 @@ function getPipelineDescriptor(device: GPUDevice, testValue: number): GPURenderP
       module,
       buffers,
     },
+    depthStencil: { format: 'depth32float', depthWriteEnabled: true, depthCompare: 'always' },
   };
 }
 
diff --git a/src/webgpu/api/validation/createView.spec.ts b/src/webgpu/api/validation/createView.spec.ts
index 56a603b714e8..c3e56bb4f011 100644
--- a/src/webgpu/api/validation/createView.spec.ts
+++ b/src/webgpu/api/validation/createView.spec.ts
@@ -6,8 +6,10 @@ import { unreachable } from '../../../common/util/util.js';
 import {
   kTextureAspects,
   kTextureDimensions,
+  kTextureUsages,
   kTextureViewDimensions,
 } from '../../capability_info.js';
+import { GPUConst } from '../../constants.js';
 import {
   kTextureFormatInfo,
   kAllTextureFormats,
@@ -339,3 +341,73 @@ g.test('texture_state')
       texture.createView();
     }, state === 'invalid');
   });
+
+g.test('texture_view_usage')
+  .desc(
+    `Test texture view usage (single, combined, inherited) for every texture format and texture usage`
+  )
+  .params(u =>
+    u //
+      .combine('format', kAllTextureFormats)
+      .combine('textureUsage0', kTextureUsages)
+      .combine('textureUsage1', kTextureUsages)
+      .filter(({ format, textureUsage0, textureUsage1 }) => {
+        const info = kTextureFormatInfo[format];
+        const textureUsage = textureUsage0 | textureUsage1;
+
+        if (
+          (textureUsage & GPUConst.TextureUsage.RENDER_ATTACHMENT) !== 0 &&
+          info.color &&
+          !info.colorRender
+        ) {
+          return false;
+        }
+
+        return true;
+      })
+      .beginSubcases()
+      .combine('textureViewUsage0', [0, ...kTextureUsages])
+      .combine('textureViewUsage1', [0, ...kTextureUsages])
+  )
+  .beforeAllSubcases(t => {
+    const { format, textureUsage0, textureUsage1 } = t.params;
+    const info = kTextureFormatInfo[format];
+    const textureUsage = textureUsage0 | textureUsage1;
+    t.skipIfTextureFormatNotSupported(format);
+    t.selectDeviceOrSkipTestCase(info.feature);
+    if (textureUsage & GPUTextureUsage.STORAGE_BINDING) {
+      t.skipIfTextureFormatNotUsableAsStorageTexture(format);
+    }
+  })
+  .fn(t => {
+    const { format, textureUsage0, textureUsage1, textureViewUsage0, textureViewUsage1 } = t.params;
+    const info = kTextureFormatInfo[format];
+
+    const size = [info.blockWidth, info.blockHeight, 1];
+    const dimension = '2d';
+    const mipLevelCount = 1;
+    const usage = textureUsage0 | textureUsage1;
+
+    const textureDescriptor: GPUTextureDescriptor = {
+      size,
+      mipLevelCount,
+      dimension,
+      format,
+      usage,
+    };
+
+    const texture = t.createTextureTracked(textureDescriptor);
+
+    let success = true;
+
+    const textureViewUsage = textureViewUsage0 | textureViewUsage1;
+
+    // Texture view usage must be a subset of texture usage
+    if ((~usage & textureViewUsage) !== 0) success = false;
+
+    t.expectValidationError(() => {
+      texture.createView({
+        usage: textureViewUsage,
+      });
+    }, !success);
+  });
diff --git a/src/webgpu/api/validation/layout_shader_compat.spec.ts b/src/webgpu/api/validation/layout_shader_compat.spec.ts
index 2b5e609c55d6..5ee16510c77a 100644
--- a/src/webgpu/api/validation/layout_shader_compat.spec.ts
+++ b/src/webgpu/api/validation/layout_shader_compat.spec.ts
@@ -253,6 +253,7 @@ g.test('pipeline_layout_shader_exact_match')
               code: vertexShader,
             }),
           },
+          depthStencil: { format: 'depth32float', depthWriteEnabled: true, depthCompare: 'always' },
         });
         break;
       }
diff --git a/src/webgpu/api/validation/render_pipeline/float32_blendable.spec.ts b/src/webgpu/api/validation/render_pipeline/float32_blendable.spec.ts
new file mode 100644
index 000000000000..ed387b5a8769
--- /dev/null
+++ b/src/webgpu/api/validation/render_pipeline/float32_blendable.spec.ts
@@ -0,0 +1,47 @@
+export const description = `
+Tests for capabilities added by float32-blendable flag.
+`;
+
+import { makeTestGroup } from '../../../../common/framework/test_group.js';
+import { ColorTextureFormat } from '../../../format_info.js';
+
+import { CreateRenderPipelineValidationTest } from './common.js';
+
+export const g = makeTestGroup(CreateRenderPipelineValidationTest);
+
+const kFloat32Formats: ColorTextureFormat[] = ['r32float', 'rg32float', 'rgba32float'];
+
+g.test('create_render_pipeline')
+  .desc(
+    `
+Tests that the float32-blendable feature is required to create a render
+pipeline that uses blending with any float32-format attachment.
+`
+  )
+  .params(u =>
+    u
+      .combine('isAsync', [false, true])
+      .combine('enabled', [true, false] as const)
+      .beginSubcases()
+      .combine('hasBlend', [true, false] as const)
+      .combine('format', kFloat32Formats)
+  )
+  .beforeAllSubcases(t => {
+    if (t.params.enabled) {
+      t.selectDeviceOrSkipTestCase('float32-blendable');
+    }
+  })
+  .fn(t => {
+    const { isAsync, enabled, hasBlend, format } = t.params;
+
+    const descriptor = t.getDescriptor({
+      targets: [
+        {
+          format,
+          blend: hasBlend ? { color: {}, alpha: {} } : undefined,
+        },
+      ],
+    });
+
+    t.doCreateRenderPipelineTest(isAsync, enabled || !hasBlend, descriptor);
+  });
diff --git a/src/webgpu/api/validation/render_pipeline/inter_stage.spec.ts b/src/webgpu/api/validation/render_pipeline/inter_stage.spec.ts
index 1a8dec37464f..a3af6d675ac3 100644
--- a/src/webgpu/api/validation/render_pipeline/inter_stage.spec.ts
+++ b/src/webgpu/api/validation/render_pipeline/inter_stage.spec.ts
@@ -273,39 +273,29 @@ g.test('max_shader_variable_location')
     t.doCreateRenderPipelineTest(isAsync, location < maxInterStageShaderVariables, descriptor);
   });
 
-g.test('max_components_count,output')
+g.test('max_variables_count,output')
   .desc(
-    `Tests that validation should fail when scalar components of all user-defined outputs > max vertex shader output components.`
+    `Tests that validation should fail when all user-defined outputs > max vertex shader output
+    variables.`
   )
   .params(u =>
     u.combine('isAsync', [false, true]).combineWithParams([
-      // Number of user-defined output scalar components in test shader =
-      //     Math.floor((device.limits.maxInterStageShaderComponents + numScalarDelta) / 4) * 4.
-      { numScalarDelta: 0, topology: 'triangle-list', _success: true },
-      { numScalarDelta: 1, topology: 'triangle-list', _success: false },
-      { numScalarDelta: 0, topology: 'point-list', _success: false },
-      { numScalarDelta: -1, topology: 'point-list', _success: false },
-      { numScalarDelta: -3, topology: 'point-list', _success: false },
-      { numScalarDelta: -4, topology: 'point-list', _success: true },
+      // Number of user-defined output variables in test shader =
+      //     device.limits.maxInterStageShaderVariables + numVariablesDelta
+      { numVariablesDelta: 0, topology: 'triangle-list', _success: true },
+      { numVariablesDelta: 1, topology: 'triangle-list', _success: false },
+      { numVariablesDelta: 0, topology: 'point-list', _success: false },
+      { numVariablesDelta: -1, topology: 'point-list', _success: true },
     ] as const)
   )
   .fn(t => {
-    const { isAsync, numScalarDelta, topology, _success } = t.params;
+    const { isAsync, numVariablesDelta, topology, _success } = t.params;
 
-    const numScalarComponents = t.device.limits.maxInterStageShaderComponents + numScalarDelta;
-
-    const numVec4 = Math.floor(numScalarComponents / 4);
-    const numTrailingScalars = numScalarComponents % 4;
+    const numVec4 = t.device.limits.maxInterStageShaderVariables + numVariablesDelta;
 
     const outputs = range(numVec4, i => `@location(${i}) vout${i}: vec4<f32>`);
     const inputs = range(numVec4, i => `@location(${i}) fin${i}: vec4<f32>`);
 
-    if (numTrailingScalars > 0) {
-      const typeString = numTrailingScalars === 1 ? 'f32' : `vec${numTrailingScalars}<f32>`;
-      outputs.push(`@location(${numVec4}) vout${numVec4}: ${typeString}`);
-      inputs.push(`@location(${numVec4}) fin${numVec4}: ${typeString}`);
-    }
-
     const descriptor = t.getDescriptorWithStates(
       t.getVertexStateWithOutputs(outputs),
       t.getFragmentStateWithInputs(inputs)
@@ -315,42 +305,32 @@ g.test('max_components_count,output')
     t.doCreateRenderPipelineTest(isAsync, _success, descriptor);
   });
 
-g.test('max_components_count,input')
+g.test('max_variables_count,input')
   .desc(
-    `Tests that validation should fail when scalar components of all user-defined inputs > max vertex shader output components.`
+    `Tests that validation should fail when all user-defined inputs > max vertex shader output
+    variables.`
   )
   .params(u =>
     u.combine('isAsync', [false, true]).combineWithParams([
-      // Number of user-defined input scalar components in test shader =
-      //     Math.floor((device.limits.maxInterStageShaderComponents + numScalarDelta) / 4) * 4.
-      { numScalarDelta: 0, useExtraBuiltinInputs: false },
-      { numScalarDelta: 1, useExtraBuiltinInputs: false },
-      { numScalarDelta: 0, useExtraBuiltinInputs: true },
-      { numScalarDelta: -3, useExtraBuiltinInputs: true },
-      { numScalarDelta: -4, useExtraBuiltinInputs: true },
+      // Number of user-defined output variables in test shader =
+      //     device.limits.maxInterStageShaderVariables + numVariablesDelta
+      { numVariablesDelta: 0, useExtraBuiltinInputs: false },
+      { numVariablesDelta: 1, useExtraBuiltinInputs: false },
+      { numVariablesDelta: 0, useExtraBuiltinInputs: true },
+      { numVariablesDelta: -1, useExtraBuiltinInputs: true },
     ] as const)
   )
   .fn(t => {
-    const { isAsync, numScalarDelta, useExtraBuiltinInputs } = t.params;
+    const { isAsync, numVariablesDelta, useExtraBuiltinInputs } = t.params;
 
-    const numScalarComponents =
-      Math.floor((t.device.limits.maxInterStageShaderComponents + numScalarDelta) / 4) * 4;
-    const numExtraComponents = useExtraBuiltinInputs ? (t.isCompatibility ? 2 : 3) : 0;
-    const numUsedComponents = numScalarComponents + numExtraComponents;
-    const success = numUsedComponents <= t.device.limits.maxInterStageShaderComponents;
-
-    const numVec4 = Math.floor(numScalarComponents / 4);
-    const numTrailingScalars = numScalarComponents % 4;
+    const numVec4 = t.device.limits.maxInterStageShaderVariables + numVariablesDelta;
+    const numExtraVariables = useExtraBuiltinInputs ? 1 : 0;
+    const numUsedVariables = numVec4 + numExtraVariables;
+    const success = numUsedVariables <= t.device.limits.maxInterStageShaderVariables;
 
     const outputs = range(numVec4, i => `@location(${i}) vout${i}: vec4<f32>`);
     const inputs = range(numVec4, i => `@location(${i}) fin${i}: vec4<f32>`);
 
-    if (numTrailingScalars > 0) {
-      const typeString = numTrailingScalars === 1 ? 'f32' : `vec${numTrailingScalars}<f32>`;
-      outputs.push(`@location(${numVec4}) vout${numVec4}: ${typeString}`);
-      inputs.push(`@location(${numVec4}) fin${numVec4}: ${typeString}`);
-    }
-
     if (useExtraBuiltinInputs) {
       inputs.push('@builtin(front_facing) front_facing_in: bool');
       if (!t.isCompatibility) {
diff --git a/src/webgpu/api/validation/render_pipeline/misc.spec.ts b/src/webgpu/api/validation/render_pipeline/misc.spec.ts
index 861eb4d24c7f..d10c7ca99985 100644
--- a/src/webgpu/api/validation/render_pipeline/misc.spec.ts
+++ b/src/webgpu/api/validation/render_pipeline/misc.spec.ts
@@ -36,7 +36,7 @@ g.test('no_attachment')
 g.test('vertex_state_only')
   .desc(
     `Tests creating vertex-state-only render pipeline. A vertex-only render pipeline has no fragment
-state (and thus has no color state), and can be created with or without depth stencil state.`
+state (and thus has no color state), and must have a depth-stencil state as an attachment is required.`
   )
   .params(u =>
     u
@@ -76,7 +76,7 @@ state (and thus has no color state), and can be created with or without depth st
       targets: hasColor ? [{ format: 'rgba8unorm' }] : [],
     });
 
-    t.doCreateRenderPipelineTest(isAsync, true, descriptor);
+    t.doCreateRenderPipelineTest(isAsync, depthStencilState !== undefined, descriptor);
   });
 
 g.test('pipeline_layout,device_mismatch')
diff --git a/src/webgpu/api/validation/resource_usages/texture/in_render_common.spec.ts b/src/webgpu/api/validation/resource_usages/texture/in_render_common.spec.ts
index 2fbb52f36694..6b65a620fc11 100644
--- a/src/webgpu/api/validation/resource_usages/texture/in_render_common.spec.ts
+++ b/src/webgpu/api/validation/resource_usages/texture/in_render_common.spec.ts
@@ -180,11 +180,21 @@ g.test('subresources,color_attachment_and_bind_group')
         { bgLayer: 0, bgLayerCount: 1 },
         { bgLayer: 1, bgLayerCount: 1 },
         { bgLayer: 1, bgLayerCount: 2 },
+        { bgLayer: 0, bgLayerCount: kTextureLayers },
       ])
       .combine('bgUsage', kTextureBindingTypes)
       .unless(t => t.bgUsage !== 'sampled-texture' && t.bgLevelCount > 1)
       .combine('inSamePass', [true, false])
   )
+  .beforeAllSubcases(t => {
+    if (t.isCompatibility) {
+      t.skipIf(t.params.bgLayer !== 0, 'view base array layer must equal 0 in compatibility mode');
+      t.skipIf(
+        t.params.bgLayerCount !== kTextureLayers,
+        'view array layers must equal texture array layers in compatibility mode'
+      );
+    }
+  })
   .fn(t => {
     const {
       colorAttachmentLevel,
@@ -288,6 +298,7 @@ g.test('subresources,depth_stencil_attachment_and_bind_group')
         { bgLayer: 0, bgLayerCount: 1 },
         { bgLayer: 1, bgLayerCount: 1 },
         { bgLayer: 1, bgLayerCount: 2 },
+        { bgLayer: 0, bgLayerCount: kTextureLayers },
       ])
       .beginSubcases()
       .combine('depthReadOnly', [true, false])
@@ -295,6 +306,15 @@ g.test('subresources,depth_stencil_attachment_and_bind_group')
       .combine('bgAspect', ['depth-only', 'stencil-only'] as const)
       .combine('inSamePass', [true, false])
   )
+  .beforeAllSubcases(t => {
+    if (t.isCompatibility) {
+      t.skipIf(t.params.bgLayer !== 0, 'view base array layer must equal 0 in compatibility mode');
+      t.skipIf(
+        t.params.bgLayerCount !== kTextureLayers,
+        'view array layers must equal texture array layers in compatibility mode'
+      );
+    }
+  })
   .fn(t => {
     const {
       dsLevel,
@@ -411,6 +431,7 @@ g.test('subresources,multiple_bind_groups')
         { base: 0, count: 1 },
         { base: 1, count: 1 },
         { base: 1, count: 2 },
+        { base: 0, count: kTextureLayers },
       ])
       .combine('bg1Levels', [
         { base: 0, count: 1 },
@@ -421,6 +442,7 @@ g.test('subresources,multiple_bind_groups')
         { base: 0, count: 1 },
         { base: 1, count: 1 },
         { base: 1, count: 2 },
+        { base: 0, count: kTextureLayers },
       ])
       .combine('bgUsage0', kTextureBindingTypes)
       .combine('bgUsage1', kTextureBindingTypes)
@@ -432,6 +454,18 @@ g.test('subresources,multiple_bind_groups')
       .beginSubcases()
       .combine('inSamePass', [true, false])
   )
+  .beforeAllSubcases(t => {
+    if (t.isCompatibility) {
+      t.skipIf(
+        t.params.bg0Layers.base !== 0 || t.params.bg1Layers.base !== 0,
+        'view base array layer must equal 0 in compatibility mode'
+      );
+      t.skipIf(
+        t.params.bg0Layers.count !== kTextureLayers || t.params.bg1Layers.count !== kTextureLayers,
+        'view array layers must equal texture array layers in compatibility mode'
+      );
+    }
+  })
   .fn(t => {
     const { bg0Levels, bg0Layers, bg1Levels, bg1Layers, bgUsage0, bgUsage1, inSamePass } = t.params;
 
@@ -524,6 +558,7 @@ g.test('subresources,depth_stencil_texture_in_bind_groups')
         { base: 0, count: 1 },
         { base: 1, count: 1 },
         { base: 1, count: 2 },
+        { base: 0, count: kTextureLayers },
       ])
       .combine('view1Levels', [
         { base: 0, count: 1 },
@@ -534,11 +569,25 @@ g.test('subresources,depth_stencil_texture_in_bind_groups')
         { base: 0, count: 1 },
         { base: 1, count: 1 },
         { base: 1, count: 2 },
+        { base: 0, count: kTextureLayers },
       ])
       .combine('aspect0', ['depth-only', 'stencil-only'] as const)
       .combine('aspect1', ['depth-only', 'stencil-only'] as const)
       .combine('inSamePass', [true, false])
   )
+  .beforeAllSubcases(t => {
+    if (t.isCompatibility) {
+      t.skipIf(
+        t.params.view0Layers.base !== 0 || t.params.view1Layers.base !== 0,
+        'view base array layer must equal 0 in compatibility mode'
+      );
+      t.skipIf(
+        t.params.view0Layers.count !== kTextureLayers ||
+          t.params.view1Layers.count !== kTextureLayers,
+        'view array layers must equal texture array layers in compatibility mode'
+      );
+    }
+  })
   .fn(t => {
     const { view0Levels, view0Layers, view1Levels, view1Layers, aspect0, aspect1, inSamePass } =
       t.params;
diff --git a/src/webgpu/api/validation/resource_usages/texture/in_render_misc.spec.ts b/src/webgpu/api/validation/resource_usages/texture/in_render_misc.spec.ts
index 05f048ac050c..a601fafec2fc 100644
--- a/src/webgpu/api/validation/resource_usages/texture/in_render_misc.spec.ts
+++ b/src/webgpu/api/validation/resource_usages/texture/in_render_misc.spec.ts
@@ -4,6 +4,7 @@ Texture Usages Validation Tests on All Kinds of WebGPU Subresource Usage Scopes.
 
 import { makeTestGroup } from '../../../../../common/framework/test_group.js';
 import { unreachable } from '../../../../../common/util/util.js';
+import { kTextureUsages } from '../../../../capability_info.js';
 import { ValidationTest } from '../../validation_test.js';
 import {
   TextureBindingType,
@@ -571,3 +572,79 @@ g.test('subresources,texture_usages_in_copy_and_render_pass')
       encoder.finish();
     }, false);
   });
+
+g.test('subresources,texture_view_usages')
+  .desc(
+    `
+  Test that the usages of the texture view are used to validate compatibility in command encoding
+  instead of the usages of the base texture.`
+  )
+  .params(u =>
+    u
+      .combine('bindingType', ['color-attachment', ...kTextureBindingTypes] as const)
+      .combine('viewUsage', [0, ...kTextureUsages])
+  )
+  .fn(t => {
+    const { bindingType, viewUsage } = t.params;
+
+    const texture = t.createTextureTracked({
+      format: 'r32float',
+      usage:
+        GPUTextureUsage.COPY_SRC |
+        GPUTextureUsage.COPY_DST |
+        GPUTextureUsage.TEXTURE_BINDING |
+        GPUTextureUsage.STORAGE_BINDING |
+        GPUTextureUsage.RENDER_ATTACHMENT,
+      size: [kTextureSize, kTextureSize, 1],
+      ...(t.isCompatibility && {
+        textureBindingViewDimension: '2d-array',
+      }),
+    });
+
+    switch (bindingType) {
+      case 'color-attachment': {
+        const encoder = t.device.createCommandEncoder();
+        const renderPassEncoder = encoder.beginRenderPass({
+          colorAttachments: [
+            { view: texture.createView({ usage: viewUsage }), loadOp: 'load', storeOp: 'store' },
+          ],
+        });
+        renderPassEncoder.end();
+
+        const success = viewUsage === 0 || (viewUsage & GPUTextureUsage.RENDER_ATTACHMENT) !== 0;
+
+        t.expectValidationError(() => {
+          encoder.finish();
+        }, !success);
+        break;
+      }
+      case 'sampled-texture':
+      case 'readonly-storage-texture':
+      case 'writeonly-storage-texture':
+      case 'readwrite-storage-texture':
+        {
+          let success = true;
+          if (viewUsage !== 0) {
+            if (bindingType === 'sampled-texture') {
+              if ((viewUsage & GPUTextureUsage.TEXTURE_BINDING) === 0) success = false;
+            } else {
+              if ((viewUsage & GPUTextureUsage.STORAGE_BINDING) === 0) success = false;
+            }
+          }
+
+          t.expectValidationError(() => {
+            t.createBindGroupForTest(
+              texture.createView({
+                dimension: '2d-array',
+                usage: viewUsage,
+              }),
+              bindingType,
+              'unfilterable-float'
+            );
+          }, !success);
+        }
+        break;
+      default:
+        unreachable();
+    }
+  });
diff --git a/src/webgpu/api/validation/shader_module/entry_point.spec.ts b/src/webgpu/api/validation/shader_module/entry_point.spec.ts
index c956dc302144..67dbef1851e1 100644
--- a/src/webgpu/api/validation/shader_module/entry_point.spec.ts
+++ b/src/webgpu/api/validation/shader_module/entry_point.spec.ts
@@ -128,6 +128,7 @@ and check that the APIs only accept matching entryPoint.
         module: t.device.createShaderModule({ code }),
         entryPoint,
       },
+      depthStencil: { format: 'depth32float', depthWriteEnabled: true, depthCompare: 'always' },
     };
     let _success = true;
     if (shaderModuleStage !== 'vertex') {
@@ -258,6 +259,7 @@ an undefined entryPoint is valid if there's an extra shader stage.
         }),
         entryPoint: undefined,
       },
+      depthStencil: { format: 'depth32float', depthWriteEnabled: true, depthCompare: 'always' },
     };
 
     const success = extraShaderModuleStage !== 'vertex';
diff --git a/src/webgpu/capability_info.ts b/src/webgpu/capability_info.ts
index 24a103dfb4b1..5af80c45fa4c 100644
--- a/src/webgpu/capability_info.ts
+++ b/src/webgpu/capability_info.ts
@@ -730,7 +730,6 @@ const [kLimitInfoKeys, kLimitInfoDefaults, kLimitInfoData] =
   'maxBufferSize':                             [           , 268435456,       268435456, kMaxUnsignedLongLongValue],
   'maxVertexAttributes':                       [           ,        16,              16,                          ],
   'maxVertexBufferArrayStride':                [           ,      2048,            2048,                          ],
-  'maxInterStageShaderComponents':             [           ,        64,              60,                          ],
   'maxInterStageShaderVariables':              [           ,        16,              15,                          ],
 
   'maxColorAttachments':                       [           ,         8,               4,                          ],
@@ -818,19 +817,22 @@ export const kFeatureNameInfo: {
   readonly [k in GPUFeatureName]: {};
 } =
   /* prettier-ignore */ {
-  'bgra8unorm-storage':       {},
-  'depth-clip-control':       {},
-  'depth32float-stencil8':    {},
-  'texture-compression-bc':   {},
-  'texture-compression-etc2': {},
-  'texture-compression-astc': {},
-  'timestamp-query':          {},
-  'indirect-first-instance':  {},
-  'shader-f16':               {},
-  'rg11b10ufloat-renderable': {},
-  'float32-filterable':       {},
-  'clip-distances':           {},
-  'dual-source-blending':     {},
+  'bgra8unorm-storage':                 {},
+  'depth-clip-control':                 {},
+  'depth32float-stencil8':              {},
+  'texture-compression-bc':             {},
+  'texture-compression-bc-sliced-3d':   {},
+  'texture-compression-etc2':           {},
+  'texture-compression-astc':           {},
+  'texture-compression-astc-sliced-3d': {},
+  'timestamp-query':                    {},
+  'indirect-first-instance':            {},
+  'shader-f16':                         {},
+  'rg11b10ufloat-renderable':           {},
+  'float32-filterable':                 {},
+  'float32-blendable':                  {},
+  'clip-distances':                     {},
+  'dual-source-blending':               {},
 };
 /** List of all GPUFeatureName values. */
 export const kFeatureNames = keysOf(kFeatureNameInfo);
diff --git a/src/webgpu/compat/api/validation/render_pipeline/unsupported_wgsl.spec.ts b/src/webgpu/compat/api/validation/render_pipeline/unsupported_wgsl.spec.ts
index 805203870903..a74e8900904c 100644
--- a/src/webgpu/compat/api/validation/render_pipeline/unsupported_wgsl.spec.ts
+++ b/src/webgpu/compat/api/validation/render_pipeline/unsupported_wgsl.spec.ts
@@ -232,6 +232,7 @@ g.test('unsupportedStorageTextureFormats,renderPipeline')
     t.doCreateRenderPipelineTest(async, isValid, {
       layout: 'auto',
       vertex: { module, entryPoint },
+      depthStencil: { format: 'depth32float', depthWriteEnabled: true, depthCompare: 'always' },
     });
   });
 
@@ -267,7 +268,7 @@ g.test('textureLoad_with_depth_textures,computePipeline')
       `,
     });
 
-    const isValid = !t.isCompatibility;
+    const isValid = !t.isCompatibility || entryPoint === 'csWithoutDepthUsage';
     t.doCreateComputePipelineTest(async, isValid, {
       layout: 'auto',
       compute: { module, entryPoint },
@@ -301,9 +302,10 @@ g.test('textureLoad_with_depth_textures,renderPipeline')
       `,
     });
 
-    const isValid = !t.isCompatibility;
+    const isValid = !t.isCompatibility || entryPoint === 'vsWithoutDepthUsage';
     t.doCreateRenderPipelineTest(async, isValid, {
       layout: 'auto',
       vertex: { module, entryPoint },
+      depthStencil: { format: 'depth32float', depthWriteEnabled: true, depthCompare: 'always' },
     });
   });
diff --git a/src/webgpu/listing_meta.json b/src/webgpu/listing_meta.json
index 85fe0bdc6a8b..c455224da996 100644
--- a/src/webgpu/listing_meta.json
+++ b/src/webgpu/listing_meta.json
@@ -273,6 +273,7 @@
   "webgpu:api,validation,buffer,mapping:unmap,state,mappedAtCreation:*": { "subcaseMS": 8.950 },
   "webgpu:api,validation,buffer,mapping:unmap,state,mappingPending:*": { "subcaseMS": 22.951 },
   "webgpu:api,validation,buffer,mapping:unmap,state,unmapped:*": { "subcaseMS": 74.200 },
+  "webgpu:api,validation,capability_checks,features,clip_distances:createRenderPipeline,at_over:*": { "subcaseMS": 13.700 },
   "webgpu:api,validation,capability_checks,features,query_types:createQuerySet:*": { "subcaseMS": 10.451 },
   "webgpu:api,validation,capability_checks,features,query_types:timestamp:*": { "subcaseMS": 1.200 },
   "webgpu:api,validation,capability_checks,features,texture_formats:canvas_configuration:*": { "subcaseMS": 4.339 },
@@ -313,7 +314,6 @@
   "webgpu:api,validation,capability_checks,limits,maxComputeWorkgroupsPerDimension:validate:*": { "subcaseMS": 138.900 },
   "webgpu:api,validation,capability_checks,limits,maxDynamicStorageBuffersPerPipelineLayout:createBindGroupLayout,at_over:*": { "subcaseMS": 15.680 },
   "webgpu:api,validation,capability_checks,limits,maxDynamicUniformBuffersPerPipelineLayout:createBindGroupLayout,at_over:*": { "subcaseMS": 10.268 },
-  "webgpu:api,validation,capability_checks,limits,maxInterStageShaderComponents:createRenderPipeline,at_over:*": { "subcaseMS": 12.916 },
   "webgpu:api,validation,capability_checks,limits,maxInterStageShaderVariables:createRenderPipeline,at_over:*": { "subcaseMS": 13.700 },
   "webgpu:api,validation,capability_checks,limits,maxSampledTexturesPerShaderStage:createBindGroupLayout,at_over:*": { "subcaseMS": 47.857 },
   "webgpu:api,validation,capability_checks,limits,maxSampledTexturesPerShaderStage:createPipeline,at_over:*": { "subcaseMS": 45.611 },
@@ -728,9 +728,9 @@
   "webgpu:api,validation,render_pipeline,inter_stage:location,mismatch:*": { "subcaseMS": 7.280 },
   "webgpu:api,validation,render_pipeline,inter_stage:location,subset:*": { "subcaseMS": 1.250 },
   "webgpu:api,validation,render_pipeline,inter_stage:location,superset:*": { "subcaseMS": 0.901 },
-  "webgpu:api,validation,render_pipeline,inter_stage:max_components_count,input:*": { "subcaseMS": 6.560 },
-  "webgpu:api,validation,render_pipeline,inter_stage:max_components_count,output:*": { "subcaseMS": 8.426 },
   "webgpu:api,validation,render_pipeline,inter_stage:max_shader_variable_location:*": { "subcaseMS": 11.050 },
+  "webgpu:api,validation,render_pipeline,inter_stage:max_variables_count,input:*": { "subcaseMS": 6.560 },
+  "webgpu:api,validation,render_pipeline,inter_stage:max_variables_count,output:*": { "subcaseMS": 8.426 },
   "webgpu:api,validation,render_pipeline,inter_stage:type:*": { "subcaseMS": 6.170 },
   "webgpu:api,validation,render_pipeline,misc:basic:*": { "subcaseMS": 0.901 },
   "webgpu:api,validation,render_pipeline,misc:external_texture:*": { "subcaseMS": 35.189 },
@@ -862,7 +862,6 @@
   "webgpu:compat,api,validation,encoding,programmable,pipeline_bind_group_compat:twoDifferentTextureViews,render_pass,used:*": { "subcaseMS": 0.000 },
   "webgpu:compat,api,validation,render_pipeline,depth_stencil_state:depthBiasClamp:*": { "subcaseMS": 1.604 },
   "webgpu:compat,api,validation,render_pipeline,fragment_state:colorState:*": { "subcaseMS": 32.604 },
-  "webgpu:compat,api,validation,render_pipeline,vertex_state:maxVertexAttributesVertexIndexInstanceIndex:*": { "subcaseMS": 3.700 },
   "webgpu:compat,api,validation,render_pipeline,unsupported_wgsl:interpolate:*": { "subcaseMS": 3.488 },
   "webgpu:compat,api,validation,render_pipeline,unsupported_wgsl:sample_index:*": { "subcaseMS": 0.487 },
   "webgpu:compat,api,validation,render_pipeline,unsupported_wgsl:sample_mask:*": { "subcaseMS": 0.408 },
@@ -870,6 +869,7 @@
   "webgpu:compat,api,validation,render_pipeline,unsupported_wgsl:textureLoad_with_depth_textures,renderPipeline:*": { "subcaseMS": 1.259 },
   "webgpu:compat,api,validation,render_pipeline,unsupported_wgsl:unsupportedStorageTextureFormats,computePipeline:*": { "subcaseMS": 1.206 },
   "webgpu:compat,api,validation,render_pipeline,unsupported_wgsl:unsupportedStorageTextureFormats,renderPipeline:*": { "subcaseMS": 1.206 },
+  "webgpu:compat,api,validation,render_pipeline,vertex_state:maxVertexAttributesVertexIndexInstanceIndex:*": { "subcaseMS": 3.700 },
   "webgpu:compat,api,validation,texture,createTexture:depthOrArrayLayers_incompatible_with_textureBindingViewDimension:*": { "subcaseMS": 12.712 },
   "webgpu:compat,api,validation,texture,createTexture:format_reinterpretation:*": { "subcaseMS": 7.012 },
   "webgpu:compat,api,validation,texture,createTexture:invalidTextureBindingViewDimension:*": { "subcaseMS": 6.022 },
@@ -1471,6 +1471,16 @@
   "webgpu:shader,execution,expression,call,builtin,pow:abstract_float:*": { "subcaseMS": 30535.000 },
   "webgpu:shader,execution,expression,call,builtin,pow:f16:*": { "subcaseMS": 816.063 },
   "webgpu:shader,execution,expression,call,builtin,pow:f32:*": { "subcaseMS": 151.269 },
+  "webgpu:shader,execution,expression,call,builtin,quadBroadcast:compute,all_active:*": { "subcaseMS": 734.565 },
+  "webgpu:shader,execution,expression,call,builtin,quadBroadcast:compute,split:*": { "subcaseMS": 2001.058 },
+  "webgpu:shader,execution,expression,call,builtin,quadBroadcast:data_types:*": { "subcaseMS": 29.144 },
+  "webgpu:shader,execution,expression,call,builtin,quadBroadcast:fragment,all_active:*": { "subcaseMS": 317.251 },
+  "webgpu:shader,execution,expression,call,builtin,quadBroadcast:fragment,split:*": { "subcaseMS": 0.206 },
+  "webgpu:shader,execution,expression,call,builtin,quadSwap:compute,all_active:*": { "subcaseMS": 1658.818 },
+  "webgpu:shader,execution,expression,call,builtin,quadSwap:compute,split:*": { "subcaseMS": 3082.458 },
+  "webgpu:shader,execution,expression,call,builtin,quadSwap:data_types:*": { "subcaseMS": 1532.129 },
+  "webgpu:shader,execution,expression,call,builtin,quadSwap:fragment,all_active:*": { "subcaseMS": 28.025 },
+  "webgpu:shader,execution,expression,call,builtin,quadSwap:fragment,split:*": { "subcaseMS": 0.542 },
   "webgpu:shader,execution,expression,call,builtin,quantizeToF16:f32:*": { "subcaseMS": 11.063 },
   "webgpu:shader,execution,expression,call,builtin,radians:abstract_float:*": { "subcaseMS": 12268.988 },
   "webgpu:shader,execution,expression,call,builtin,radians:f16:*": { "subcaseMS": 18.707 },
@@ -1525,15 +1535,34 @@
   "webgpu:shader,execution,expression,call,builtin,step:f32:*": { "subcaseMS": 291.363 },
   "webgpu:shader,execution,expression,call,builtin,storageBarrier:barrier:*": { "subcaseMS": 0.801 },
   "webgpu:shader,execution,expression,call,builtin,storageBarrier:stage:*": { "subcaseMS": 2.402 },
+  "webgpu:shader,execution,expression,call,builtin,subgroupAdd:compute,split:*": { "subcaseMS": 2853.671 },
+  "webgpu:shader,execution,expression,call,builtin,subgroupAdd:data_types:*": { "subcaseMS": 9216.247 },
+  "webgpu:shader,execution,expression,call,builtin,subgroupAdd:fp_accuracy:*": { "subcaseMS": 9952.350 },
+  "webgpu:shader,execution,expression,call,builtin,subgroupAdd:fragment:*": { "subcaseMS": 0.229 },
+  "webgpu:shader,execution,expression,call,builtin,subgroupAll:compute,all_active:*": { "subcaseMS": 5162.414 },
+  "webgpu:shader,execution,expression,call,builtin,subgroupAll:compute,split:*": { "subcaseMS": 26610.627 },
+  "webgpu:shader,execution,expression,call,builtin,subgroupAll:fragment,all_active:*": { "subcaseMS": 0.172 },
+  "webgpu:shader,execution,expression,call,builtin,subgroupAll:fragment,split:*": { "subcaseMS": 0.327 },
+  "webgpu:shader,execution,expression,call,builtin,subgroupAny:compute,all_active:*": { "subcaseMS": 7028.394 },
+  "webgpu:shader,execution,expression,call,builtin,subgroupAny:compute,split:*": { "subcaseMS": 50.998 },
+  "webgpu:shader,execution,expression,call,builtin,subgroupAny:fragment,all_active:*": { "subcaseMS": 0.227 },
+  "webgpu:shader,execution,expression,call,builtin,subgroupAny:fragment,split:*": { "subcaseMS": 0.309 },
   "webgpu:shader,execution,expression,call,builtin,subgroupBallot:compute,split:*": { "subcaseMS": 38.740 },
   "webgpu:shader,execution,expression,call,builtin,subgroupBallot:fragment,split:*": { "subcaseMS": 0.331 },
   "webgpu:shader,execution,expression,call,builtin,subgroupBallot:fragment:*": { "subcaseMS": 0.059 },
   "webgpu:shader,execution,expression,call,builtin,subgroupBallot:predicate:*": { "subcaseMS": 0.075 },
   "webgpu:shader,execution,expression,call,builtin,subgroupBallot:predicate_and_control_flow:*": { "subcaseMS": 41.053 },
+  "webgpu:shader,execution,expression,call,builtin,subgroupBitwise:compute,all_active:*": { "subcaseMS": 1251.161 },
+  "webgpu:shader,execution,expression,call,builtin,subgroupBitwise:compute,split:*": { "subcaseMS": 1743.045 },
+  "webgpu:shader,execution,expression,call,builtin,subgroupBitwise:data_types:*": { "subcaseMS": 5081.792 },
+  "webgpu:shader,execution,expression,call,builtin,subgroupBitwise:fragment,all_active:*": { "subcaseMS": 9079.446 },
   "webgpu:shader,execution,expression,call,builtin,subgroupBroadcast:data_types:*": { "subcaseMS": 252.374 },
-  "webgpu:shader,execution,expression,call,builtin,subgroupBroadcast:dynamically_uniform_id:*": { "subcaseMS": 0.211 },
   "webgpu:shader,execution,expression,call,builtin,subgroupBroadcast:fragment:*": { "subcaseMS": 0.108 },
   "webgpu:shader,execution,expression,call,builtin,subgroupBroadcast:workgroup_uniform_load:*": { "subcaseMS": 109.832 },
+  "webgpu:shader,execution,expression,call,builtin,subgroupMul:compute,split:*": { "subcaseMS": 5034.263 },
+  "webgpu:shader,execution,expression,call,builtin,subgroupMul:data_types:*": { "subcaseMS": 11861.865 },
+  "webgpu:shader,execution,expression,call,builtin,subgroupMul:fp_accuracy:*": { "subcaseMS": 35606.717 },
+  "webgpu:shader,execution,expression,call,builtin,subgroupMul:fragment:*": { "subcaseMS": 0.263 },
   "webgpu:shader,execution,expression,call,builtin,tan:abstract_float:*": { "subcaseMS": 17043.428 },
   "webgpu:shader,execution,expression,call,builtin,tan:f16:*": { "subcaseMS": 116.157 },
   "webgpu:shader,execution,expression,call,builtin,tan:f32:*": { "subcaseMS": 13.532 },
@@ -1554,8 +1583,8 @@
   "webgpu:shader,execution,expression,call,builtin,textureGather:sampled_array_3d_coords:*": { "subcaseMS": 60.700 },
   "webgpu:shader,execution,expression,call,builtin,textureGatherCompare:array_2d_coords:*": { "subcaseMS": 291.301 },
   "webgpu:shader,execution,expression,call,builtin,textureGatherCompare:array_3d_coords:*": { "subcaseMS": 191.101 },
-  "webgpu:shader,execution,expression,call,builtin,textureGatherCompare:sampled_array_2d_coords:*": { "subcaseMS": 57.600 },
-  "webgpu:shader,execution,expression,call,builtin,textureGatherCompare:sampled_array_3d_coords:*": { "subcaseMS": 10.101 },
+  "webgpu:shader,execution,expression,call,builtin,textureGatherCompare:sampled_2d_coords:*": { "subcaseMS": 57.600 },
+  "webgpu:shader,execution,expression,call,builtin,textureGatherCompare:sampled_3d_coords:*": { "subcaseMS": 10.101 },
   "webgpu:shader,execution,expression,call,builtin,textureLoad:arrayed:*": { "subcaseMS": 30.501 },
   "webgpu:shader,execution,expression,call,builtin,textureLoad:depth:*": { "subcaseMS": 3.200 },
   "webgpu:shader,execution,expression,call,builtin,textureLoad:external:*": { "subcaseMS": 1.401 },
@@ -1579,7 +1608,6 @@
   "webgpu:shader,execution,expression,call,builtin,textureSample:depth_array_2d_coords:*": { "subcaseMS": 92.601 },
   "webgpu:shader,execution,expression,call,builtin,textureSample:depth_array_3d_coords:*": { "subcaseMS": 20.301 },
   "webgpu:shader,execution,expression,call,builtin,textureSample:sampled_1d_coords:*": { "subcaseMS": 1.200 },
-  "webgpu:shader,execution,expression,call,builtin,textureSample:sampled_2d_coords,derivatives:*": { "subcaseMS": 0.091 },
   "webgpu:shader,execution,expression,call,builtin,textureSample:sampled_2d_coords:*": { "subcaseMS": 12.500 },
   "webgpu:shader,execution,expression,call,builtin,textureSample:sampled_3d_coords:*": { "subcaseMS": 36.002 },
   "webgpu:shader,execution,expression,call,builtin,textureSample:sampled_array_2d_coords:*": { "subcaseMS": 92.500 },
@@ -1597,8 +1625,6 @@
   "webgpu:shader,execution,expression,call,builtin,textureSampleCompareLevel:3d_coords:*": { "subcaseMS": 10.301 },
   "webgpu:shader,execution,expression,call,builtin,textureSampleCompareLevel:arrayed_2d_coords:*": { "subcaseMS": 705.100 },
   "webgpu:shader,execution,expression,call,builtin,textureSampleCompareLevel:arrayed_3d_coords:*": { "subcaseMS": 622.700 },
-  "webgpu:shader,execution,expression,call,builtin,textureSampleCompareLevel:control_flow:*": { "subcaseMS": 2.202 },
-  "webgpu:shader,execution,expression,call,builtin,textureSampleCompareLevel:stage:*": { "subcaseMS": 7.901 },
   "webgpu:shader,execution,expression,call,builtin,textureSampleGrad:sampled_2d_coords:*": { "subcaseMS": 82.401 },
   "webgpu:shader,execution,expression,call,builtin,textureSampleGrad:sampled_3d_coords:*": { "subcaseMS": 309.101 },
   "webgpu:shader,execution,expression,call,builtin,textureSampleGrad:sampled_array_2d_coords:*": { "subcaseMS": 352.900 },
@@ -1849,6 +1875,8 @@
   "webgpu:shader,execution,padding:array_of_matCx3:*": { "subcaseMS": 8.650 },
   "webgpu:shader,execution,padding:array_of_struct:*": { "subcaseMS": 5.801 },
   "webgpu:shader,execution,padding:array_of_vec3:*": { "subcaseMS": 10.500 },
+  "webgpu:shader,execution,padding:array_of_vec3h,elementwise:*": { "subcaseMS": 24.607 },
+  "webgpu:shader,execution,padding:array_of_vec3h:*": { "subcaseMS": 26.941 },
   "webgpu:shader,execution,padding:matCx3:*": { "subcaseMS": 10.050 },
   "webgpu:shader,execution,padding:struct_explicit:*": { "subcaseMS": 12.000 },
   "webgpu:shader,execution,padding:struct_implicit:*": { "subcaseMS": 33.201 },
@@ -2005,6 +2033,11 @@
   "webgpu:shader,validation,expression,binary,div_rem:scalar_vector:*": { "subcaseMS": 743.721 },
   "webgpu:shader,validation,expression,binary,div_rem:scalar_vector_out_of_range:*": { "subcaseMS": 650.727 },
   "webgpu:shader,validation,expression,binary,parse:all:*": { "subcaseMS": 527.287 },
+  "webgpu:shader,validation,expression,binary,short_circuiting_and_or:invalid_array_count_on_rhs:*": { "subcaseMS": 4.309 },
+  "webgpu:shader,validation,expression,binary,short_circuiting_and_or:invalid_rhs_const:*": { "subcaseMS": 4.341 },
+  "webgpu:shader,validation,expression,binary,short_circuiting_and_or:invalid_rhs_override:*": { "subcaseMS": 27.490 },
+  "webgpu:shader,validation,expression,binary,short_circuiting_and_or:invalid_types:*": { "subcaseMS": 13.409 },
+  "webgpu:shader,validation,expression,binary,short_circuiting_and_or:scalar_vector:*": { "subcaseMS": 397.769 },
   "webgpu:shader,validation,expression,call,builtin,abs:parameters:*": { "subcaseMS": 10.133 },
   "webgpu:shader,validation,expression,call,builtin,abs:values:*": { "subcaseMS": 0.391 },
   "webgpu:shader,validation,expression,call,builtin,acos:integer_argument:*": { "subcaseMS": 1.512 },
@@ -2226,6 +2259,22 @@
   "webgpu:shader,validation,expression,call,builtin,pow:invalid_argument:*": { "subcaseMS": 1.000 },
   "webgpu:shader,validation,expression,call,builtin,pow:must_use:*": { "subcaseMS": 1.000 },
   "webgpu:shader,validation,expression,call,builtin,pow:values:*": { "subcaseMS": 1.000 },
+  "webgpu:shader,validation,expression,call,builtin,quadBroadcast:data_type:*": { "subcaseMS": 39.783 },
+  "webgpu:shader,validation,expression,call,builtin,quadBroadcast:early_eval:*": { "subcaseMS": 63.825 },
+  "webgpu:shader,validation,expression,call,builtin,quadBroadcast:id_constness:*": { "subcaseMS": 15.347 },
+  "webgpu:shader,validation,expression,call,builtin,quadBroadcast:id_type:*": { "subcaseMS": 26.268 },
+  "webgpu:shader,validation,expression,call,builtin,quadBroadcast:must_use:*": { "subcaseMS": 41.658 },
+  "webgpu:shader,validation,expression,call,builtin,quadBroadcast:requires_subgroups:*": { "subcaseMS": 42.565 },
+  "webgpu:shader,validation,expression,call,builtin,quadBroadcast:requires_subgroups_f16:*": { "subcaseMS": 44.998 },
+  "webgpu:shader,validation,expression,call,builtin,quadBroadcast:return_type:*": { "subcaseMS": 363.607 },
+  "webgpu:shader,validation,expression,call,builtin,quadBroadcast:stage:*": { "subcaseMS": 3.050 },
+  "webgpu:shader,validation,expression,call,builtin,quadSwap:data_type:*": { "subcaseMS": 89.379 },
+  "webgpu:shader,validation,expression,call,builtin,quadSwap:early_eval:*": { "subcaseMS": 108.243 },
+  "webgpu:shader,validation,expression,call,builtin,quadSwap:must_use:*": { "subcaseMS": 5.557 },
+  "webgpu:shader,validation,expression,call,builtin,quadSwap:requires_subgroups:*": { "subcaseMS": 113.624 },
+  "webgpu:shader,validation,expression,call,builtin,quadSwap:requires_subgroups_f16:*": { "subcaseMS": 12.712 },
+  "webgpu:shader,validation,expression,call,builtin,quadSwap:return_type:*": { "subcaseMS": 1424.551 },
+  "webgpu:shader,validation,expression,call,builtin,quadSwap:stage:*": { "subcaseMS": 7.664 },
   "webgpu:shader,validation,expression,call,builtin,quantizeToF16:args:*": { "subcaseMS": 1.000 },
   "webgpu:shader,validation,expression,call,builtin,quantizeToF16:must_use:*": { "subcaseMS": 1.000 },
   "webgpu:shader,validation,expression,call,builtin,quantizeToF16:values:*": { "subcaseMS": 1.000 },
@@ -2278,16 +2327,73 @@
   "webgpu:shader,validation,expression,call,builtin,step:args:*": { "subcaseMS": 1.000 },
   "webgpu:shader,validation,expression,call,builtin,step:must_use:*": { "subcaseMS": 1.000 },
   "webgpu:shader,validation,expression,call,builtin,step:values:*": { "subcaseMS": 1.000 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupAdd:data_type:*": { "subcaseMS": 32.897 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupAdd:early_eval:*": { "subcaseMS": 101.800 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupAdd:invalid_types:*": { "subcaseMS": 95.889 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupAdd:must_use:*": { "subcaseMS": 62.933 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupAdd:return_type:*": { "subcaseMS": 363.546 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupAdd:stage:*": { "subcaseMS": 3.536 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupAnyAll:data_type:*": { "subcaseMS": 57.943 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupAnyAll:early_eval:*": { "subcaseMS": 173.714 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupAnyAll:must_use:*": { "subcaseMS": 4.592 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupAnyAll:requires_subgroups:*": { "subcaseMS": 73.866 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupAnyAll:return_type:*": { "subcaseMS": 39.388 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupAnyAll:stage:*": { "subcaseMS": 6.862 },
   "webgpu:shader,validation,expression,call,builtin,subgroupBallot:data_type:*": { "subcaseMS": 115.557 },
   "webgpu:shader,validation,expression,call,builtin,subgroupBallot:early_eval:*": { "subcaseMS": 52.992 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupBallot:must_use:*": { "subcaseMS": 39.441 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupBallot:requires_subgroups:*": { "subcaseMS": 36.819 },
   "webgpu:shader,validation,expression,call,builtin,subgroupBallot:return_type:*": { "subcaseMS": 22.381 },
   "webgpu:shader,validation,expression,call,builtin,subgroupBallot:stage:*": { "subcaseMS": 3.712 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupBitwise:data_type:*": { "subcaseMS": 94.072 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupBitwise:early_eval:*": { "subcaseMS": 569.598 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupBitwise:must_use:*": { "subcaseMS": 6.172 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupBitwise:requires_subgroups:*": { "subcaseMS": 108.478 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupBitwise:return_type:*": { "subcaseMS": 1430.736 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupBitwise:stage:*": { "subcaseMS": 11.858 },
   "webgpu:shader,validation,expression,call,builtin,subgroupBroadcast:data_type:*": { "subcaseMS": 97.991 },
   "webgpu:shader,validation,expression,call,builtin,subgroupBroadcast:early_eval:*": { "subcaseMS": 1.254 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupBroadcast:id_constness:*": { "subcaseMS": 7.026 },
   "webgpu:shader,validation,expression,call,builtin,subgroupBroadcast:id_type:*": { "subcaseMS": 24.703 },
   "webgpu:shader,validation,expression,call,builtin,subgroupBroadcast:must_use:*": { "subcaseMS": 232.030 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupBroadcast:requires_subgroups:*": { "subcaseMS": 47.231 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupBroadcast:requires_subgroups_f16:*": { "subcaseMS": 38.503 },
   "webgpu:shader,validation,expression,call,builtin,subgroupBroadcast:return_type:*": { "subcaseMS": 496.031 },
   "webgpu:shader,validation,expression,call,builtin,subgroupBroadcast:stage:*": { "subcaseMS": 3.715 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupBroadcastFirst:data_type:*": { "subcaseMS": 32.168 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupBroadcastFirst:early_eval:*": { "subcaseMS": 57.922 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupBroadcastFirst:must_use:*": { "subcaseMS": 36.296 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupBroadcastFirst:requires_subgroups:*": { "subcaseMS": 42.522 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupBroadcastFirst:requires_subgroups_f16:*": { "subcaseMS": 47.111 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupBroadcastFirst:return_type:*": { "subcaseMS": 402.558 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupBroadcastFirst:stage:*": { "subcaseMS": 2.869 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupElect:data_type:*": { "subcaseMS": 72.441 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupElect:early_eval:*": { "subcaseMS": 56.115 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupElect:must_use:*": { "subcaseMS": 32.820 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupElect:requires_subgroups:*": { "subcaseMS": 35.595 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupElect:return_type:*": { "subcaseMS": 22.712 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupElect:stage:*": { "subcaseMS": 3.790 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupMinMax:data_type:*": { "subcaseMS": 64.143 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupMinMax:early_eval:*": { "subcaseMS": 551.671 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupMinMax:must_use:*": { "subcaseMS": 4.403 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupMinMax:requires_subgroups:*": { "subcaseMS": 87.208 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupMinMax:requires_subgroups_f16:*": { "subcaseMS": 25.190 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupMinMax:return_type:*": { "subcaseMS": 911.454 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupMinMax:stage:*": { "subcaseMS": 6.395 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupMul:data_type:*": { "subcaseMS": 45.396 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupMul:early_eval:*": { "subcaseMS": 56.571 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupMul:invalid_types:*": { "subcaseMS": 91.040 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupMul:must_use:*": { "subcaseMS": 39.041 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupMul:return_type:*": { "subcaseMS": 549.172 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupMul:stage:*": { "subcaseMS": 4.489 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupShuffle:data_type:*": { "subcaseMS": 115.093 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupShuffle:early_eval:*": { "subcaseMS": 110.489 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupShuffle:must_use:*": { "subcaseMS": 7.628 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupShuffle:param2_type:*": { "subcaseMS": 88.305 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupShuffle:requires_subgroups:*": { "subcaseMS": 102.779 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupShuffle:requires_subgroups_f16:*": { "subcaseMS": 13.121 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupShuffle:return_type:*": { "subcaseMS": 1930.309 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupShuffle:stage:*": { "subcaseMS": 9.527 },
   "webgpu:shader,validation,expression,call,builtin,tan:args:*": { "subcaseMS": 43.560 },
   "webgpu:shader,validation,expression,call,builtin,tan:must_use:*": { "subcaseMS": 5.401 },
   "webgpu:shader,validation,expression,call,builtin,tan:values:*": { "subcaseMS": 0.350 },
diff --git a/src/webgpu/print_environment.spec.ts b/src/webgpu/print_environment.spec.ts
index 9790c770cefa..f3ca67d3a2c0 100644
--- a/src/webgpu/print_environment.spec.ts
+++ b/src/webgpu/print_environment.spec.ts
@@ -35,9 +35,7 @@ NOTE: If your test runtime elides logs when tests pass, you won't see the prints
 in the logs. On non-WPT runtimes, it will also print to the console with console.log.
 WPT disallows console.log and doesn't support logs on passing tests, so this does nothing on WPT.`
   )
-  .fn(async t => {
-    // MAINTENANCE_TODO: Remove requestAdapterInfo when info is implemented.
-    const adapterInfo = t.adapter.info || (await t.adapter.requestAdapterInfo());
+  .fn(t => {
     const isCompatibilityMode = (t.adapter as unknown as { isCompatibilityMode?: boolean })
       .isCompatibilityMode;
 
@@ -51,7 +49,7 @@ WPT disallows console.log and doesn't support logs on passing tests, so this doe
         adapter: {
           isFallbackAdapter: t.adapter.isFallbackAdapter,
           isCompatibilityMode,
-          info: adapterInfo,
+          info: t.adapter.info,
           features: Array.from(t.adapter.features),
           limits: t.adapter.limits,
         },
diff --git a/src/webgpu/shader/execution/expression/access/matrix/index.spec.ts b/src/webgpu/shader/execution/expression/access/matrix/index.spec.ts
index f6fd05b46fcb..b8872eeab99f 100644
--- a/src/webgpu/shader/execution/expression/access/matrix/index.spec.ts
+++ b/src/webgpu/shader/execution/expression/access/matrix/index.spec.ts
@@ -11,7 +11,9 @@ import {
   abstractFloat,
   f32,
   vec,
+  Value,
 } from '../../../../../util/conversion.js';
+import { align } from '../../../../../util/math.js';
 import { Case } from '../../case.js';
 import { allInputSources, basicExpressionBuilder, run } from '../../expression.js';
 
@@ -198,3 +200,73 @@ g.test('abstract_float_element')
       cases
     );
   });
+
+g.test('non_const_index')
+  .specURL('https://www.w3.org/TR/WGSL/#matrix-access-expr')
+  .desc(`Test indexing of a matrix using non-const index`)
+  .params(u => u.combine('columns', [2, 3, 4] as const).combine('rows', [2, 3, 4] as const))
+  .fn(t => {
+    const cols = t.params.columns;
+    const rows = t.params.rows;
+    const values = Array.from(Array(cols * rows).keys());
+    const wgsl = `
+@group(0) @binding(0) var<storage, read_write> output : array<f32, ${cols * rows}>;
+
+@compute @workgroup_size(${cols}, ${rows})
+fn main(@builtin(local_invocation_id) invocation_id : vec3<u32>) {
+  let m = mat${cols}x${rows}f(${values.join(', ')});
+  output[invocation_id.x*${rows} + invocation_id.y] = m[invocation_id.x][invocation_id.y];
+}
+`;
+
+    const pipeline = t.device.createComputePipeline({
+      layout: 'auto',
+      compute: {
+        module: t.device.createShaderModule({ code: wgsl }),
+        entryPoint: 'main',
+      },
+    });
+
+    const bufferSize = (arr: Value[]) => {
+      let offset = 0;
+      let alignment = 0;
+      for (const value of arr) {
+        alignment = Math.max(alignment, value.type.alignment);
+        offset = align(offset, value.type.alignment) + value.type.size;
+      }
+      return align(offset, alignment);
+    };
+
+    const toArray = (arr: Value[]) => {
+      const array = new Uint8Array(bufferSize(arr));
+      let offset = 0;
+      for (const value of arr) {
+        offset = align(offset, value.type.alignment);
+        value.copyTo(array, offset);
+        offset += value.type.size;
+      }
+      return array;
+    };
+
+    const expected = values.map(i => Type['f32'].create(i));
+
+    const outputBuffer = t.createBufferTracked({
+      size: bufferSize(expected),
+      usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC,
+    });
+
+    const bindGroup = t.device.createBindGroup({
+      layout: pipeline.getBindGroupLayout(0),
+      entries: [{ binding: 0, resource: { buffer: outputBuffer } }],
+    });
+
+    const encoder = t.device.createCommandEncoder();
+    const pass = encoder.beginComputePass();
+    pass.setPipeline(pipeline);
+    pass.setBindGroup(0, bindGroup);
+    pass.dispatchWorkgroups(1);
+    pass.end();
+    t.queue.submit([encoder.finish()]);
+
+    t.expectGPUBufferValuesEqual(outputBuffer, toArray(expected));
+  });
diff --git a/src/webgpu/shader/execution/expression/call/builtin/quadBroadcast.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/quadBroadcast.spec.ts
new file mode 100644
index 000000000000..29a3ec47c860
--- /dev/null
+++ b/src/webgpu/shader/execution/expression/call/builtin/quadBroadcast.spec.ts
@@ -0,0 +1,656 @@
+export const description = `
+Execution tests for quadBroadcast.
+
+Note: There is a lack of portability for non-uniform execution so these tests
+restrict themselves to uniform control flow.
+Note: There is no guaranteed mapping between subgroup_invocation_id and
+local_invocation_index. Tests should avoid assuming there is.
+`;
+
+import { makeTestGroup } from '../../../../../../common/framework/test_group.js';
+import { keysOf, objectsToRecord } from '../../../../../../common/util/data_tables.js';
+import { assert, unreachable } from '../../../../../../common/util/util.js';
+import { kTextureFormatInfo } from '../../../../../format_info.js';
+import { kBit } from '../../../../../util/constants.js';
+import {
+  kConcreteNumericScalarsAndVectors,
+  Type,
+  VectorType,
+  scalarTypeOf,
+} from '../../../../../util/conversion.js';
+import { align } from '../../../../../util/math.js';
+
+import {
+  kWGSizes,
+  kDataSentinel,
+  kPredicateCases,
+  runComputeTest,
+  SubgroupTest,
+  kFramebufferSizes,
+  runFragmentTest,
+} from './subgroup_util.js';
+
+export const g = makeTestGroup(SubgroupTest);
+
+const kTypes = objectsToRecord(kConcreteNumericScalarsAndVectors);
+
+/**
+ * Generates scalar values for type
+ *
+ * Generates 4 32-bit values whose bit patterns represent
+ * interesting values of the data type.
+ * @param type The data type
+ */
+function generateScalarValues(type: Type): number[] {
+  const scalarTy = scalarTypeOf(type);
+  switch (scalarTy) {
+    case Type.u32:
+      return [kBit.u32.min, kBit.u32.max, 1111, 2222];
+    case Type.i32:
+      return [
+        kBit.i32.positive.min,
+        kBit.i32.positive.max,
+        kBit.i32.negative.min,
+        0xffffffff, // -1
+      ];
+    case Type.f32:
+      return [
+        kBit.f32.positive.zero,
+        kBit.f32.positive.nearest_max,
+        kBit.f32.negative.nearest_min,
+        0xbf800000, // -1
+      ];
+    case Type.f16:
+      return [
+        kBit.f16.positive.zero,
+        kBit.f16.positive.nearest_max,
+        kBit.f16.negative.nearest_min,
+        0xbc00, // -1
+      ];
+    default:
+      unreachable(`Unsupported type: ${type.toString()}`);
+  }
+  return [0, 0, 0, 0];
+}
+
+/**
+ * Generates input bit patterns for the input type
+ *
+ * Generates 4 values of type in a Uint32Array.
+ * 16-bit types are appropriately packed.
+ * @param type The data type
+ */
+function generateTypedInputs(type: Type): Uint32Array {
+  const scalarValues = generateScalarValues(type);
+  let elements = 1;
+  if (type instanceof VectorType) {
+    elements = type.width;
+  }
+  if (type.requiresF16()) {
+    switch (elements) {
+      case 1:
+        return new Uint32Array([
+          scalarValues[0] | (scalarValues[1] << 16),
+          scalarValues[2] | (scalarValues[3] << 16),
+        ]);
+      case 2:
+        return new Uint32Array([
+          scalarValues[0] | (scalarValues[0] << 16),
+          scalarValues[1] | (scalarValues[1] << 16),
+          scalarValues[2] | (scalarValues[2] << 16),
+          scalarValues[3] | (scalarValues[3] << 16),
+        ]);
+      case 3:
+        return new Uint32Array([
+          scalarValues[0] | (scalarValues[0] << 16),
+          scalarValues[0] | (kDataSentinel << 16),
+          scalarValues[1] | (scalarValues[1] << 16),
+          scalarValues[1] | (kDataSentinel << 16),
+          scalarValues[2] | (scalarValues[2] << 16),
+          scalarValues[2] | (kDataSentinel << 16),
+          scalarValues[3] | (scalarValues[3] << 16),
+          scalarValues[3] | (kDataSentinel << 16),
+        ]);
+      case 4:
+        return new Uint32Array([
+          scalarValues[0] | (scalarValues[0] << 16),
+          scalarValues[0] | (scalarValues[0] << 16),
+          scalarValues[1] | (scalarValues[1] << 16),
+          scalarValues[1] | (scalarValues[1] << 16),
+          scalarValues[2] | (scalarValues[2] << 16),
+          scalarValues[2] | (scalarValues[2] << 16),
+          scalarValues[3] | (scalarValues[3] << 16),
+          scalarValues[3] | (scalarValues[3] << 16),
+        ]);
+      default:
+        unreachable(`Unsupported type: ${type.toString()}`);
+    }
+    return new Uint32Array([0]);
+  } else {
+    const bound = elements === 3 ? 4 : elements;
+    const values: number[] = [];
+    for (let i = 0; i < 4; i++) {
+      for (let j = 0; j < bound; j++) {
+        if (j < elements) {
+          values.push(scalarValues[i]);
+        } else {
+          values.push(kDataSentinel);
+        }
+      }
+    }
+    return new Uint32Array(values);
+  }
+}
+
+/**
+ * Checks results from data types test
+ *
+ * The output is expected to match the input values corresponding to the
+ * id being broadcast (assuming a linear mapping).
+ * @param metadata An unused parameter
+ * @param output The output data
+ * @param input The input data
+ * @param broadcast The id being broadcast
+ * @param type The data type being tested
+ */
+function checkDataTypes(
+  metadata: Uint32Array, // unused
+  output: Uint32Array,
+  input: Uint32Array,
+  broadcast: number,
+  type: Type
+): Error | undefined {
+  if (type.requiresF16() && !(type instanceof VectorType)) {
+    const expectIdx = Math.floor(broadcast / 2);
+    const expectShift = broadcast % 2 === 1;
+    let expect = input[expectIdx];
+    if (expectShift) {
+      expect >>= 16;
+    }
+    expect &= 0xffff;
+
+    for (let i = 0; i < 4; i++) {
+      const index = Math.floor(i / 2);
+      const shift = i % 2 === 1;
+      let res = output[index];
+      if (shift) {
+        res >>= 16;
+      }
+      res &= 0xffff;
+      if (res !== expect) {
+        return new Error(`${i}: incorrect result
+- expected: ${expect}
+-      got: ${res}`);
+      }
+    }
+  } else {
+    let uints = 1;
+    if (type instanceof VectorType) {
+      uints = type.width === 3 ? 4 : type.width;
+      if (type.requiresF16()) {
+        uints = Math.floor(uints / 2);
+      }
+    }
+    for (let i = 0; i < 4; i++) {
+      for (let j = 0; j < uints; j++) {
+        const expect = input[broadcast * uints + j];
+        const res = output[i * uints + j];
+        if (res !== expect) {
+          return new Error(`${i * uints + j}: incorrect result
+- expected: ${expect}
+-      got: ${res}`);
+        }
+      }
+    }
+  }
+
+  return undefined;
+}
+
+g.test('data_types')
+  .desc('Test allowed data types')
+  .params(u =>
+    u
+      .combine('type', keysOf(kTypes))
+      .beginSubcases()
+      .combine('id', [0, 1, 2, 3] as const)
+  )
+  .beforeAllSubcases(t => {
+    const features: GPUFeatureName[] = ['subgroups' as GPUFeatureName];
+    const type = kTypes[t.params.type];
+    if (type.requiresF16()) {
+      features.push('subgroups-f16' as GPUFeatureName);
+      features.push('shader-f16' as GPUFeatureName);
+    }
+    t.selectDeviceOrSkipTestCase(features);
+  })
+  .fn(async t => {
+    const wgSize = [4, 1, 1];
+    const type = kTypes[t.params.type];
+    let enables = `enable subgroups;\n`;
+    if (type.requiresF16()) {
+      enables += `enable f16;\nenable subgroups_f16;`;
+    }
+    const wgsl = `
+${enables}
+
+@group(0) @binding(0)
+var<storage> input : array<${type.toString()}>;
+
+@group(0) @binding(1)
+var<storage, read_write> output : array<${type.toString()}>;
+
+@group(0) @binding(2)
+var<storage, read_write> metadata : array<u32>; // unused
+
+@compute @workgroup_size(${wgSize[0]}, ${wgSize[1]}, ${wgSize[2]})
+fn main(
+  @builtin(subgroup_invocation_id) id : u32,
+) {
+  // Force usage
+  _ = metadata[0];
+
+  output[id] = quadBroadcast(input[id], ${t.params.id});
+}`;
+
+    const inputData = generateTypedInputs(type);
+    let uintsPerOutput = 1;
+    if (type instanceof VectorType) {
+      uintsPerOutput = type.width === 3 ? 4 : type.width;
+      if (type.requiresF16()) {
+        uintsPerOutput = Math.floor(uintsPerOutput / 2);
+      }
+    }
+    await runComputeTest(
+      t,
+      wgsl,
+      wgSize,
+      uintsPerOutput,
+      inputData,
+      (metadata: Uint32Array, output: Uint32Array) => {
+        return checkDataTypes(metadata, output, inputData, t.params.id, type);
+      }
+    );
+  });
+
+/**
+ * Checks quadBroadcast in compute shaders
+ *
+ * Assumes that quads are linear within a subgroup.
+ *
+ * @param metadata An array of integers divided as follows:
+ *                 * first half subgroup invocation ids
+ *                 * second half subgroup sizes
+ * @param output An array of integers divided as follows:
+ *               * first half results of quad broadcast
+ *               * second half generated unique subgroup ids
+ * @param broadcast The id being broadcast in the range [0, 3]
+ * @param filter A functor to filter active invocations
+ */
+function checkBroadcastCompute(
+  metadata: Uint32Array,
+  output: Uint32Array,
+  broadcast: number,
+  filter: (id: number, size: number) => boolean
+): Error | undefined {
+  assert(broadcast === Math.trunc(broadcast));
+  assert(broadcast >= 0 && broadcast <= 3);
+
+  const bound = Math.floor(output.length / 2);
+  for (let i = 0; i < bound; i++) {
+    const subgroup_id = output[bound + i];
+    const id = metadata[i];
+    const size = metadata[bound + i];
+    if (!filter(id, size)) {
+      if (output[i] !== kDataSentinel) {
+        return new Error(`Unexpected write for invocation ${i}`);
+      }
+      continue;
+    }
+
+    const quad_id = Math.floor(id / 4);
+    const quad = [-1, -1, -1, -1];
+    for (let j = 0; j < bound; j++) {
+      const other_id = metadata[j];
+      const other_quad_id = Math.floor(other_id / 4);
+      const other_quad_index = other_id % 4;
+      const other_subgroup_id = output[bound + j];
+      if (other_subgroup_id === subgroup_id && quad_id === other_quad_id) {
+        quad[other_quad_index] = j;
+      }
+    }
+    for (let j = 0; j < 4; j++) {
+      if (quad[j] === -1) {
+        return new Error(`Invocation ${i}: missing quad index ${j}`);
+      }
+    }
+    for (let j = 0; j < 4; j++) {
+      if (output[quad[j]] !== output[quad[broadcast]]) {
+        return new Error(`Incorrect result for quad: base invocation = ${
+          quad[broadcast]
+        }, invocation = ${quad[j]}
+- expected: ${output[quad[broadcast]]}
+-      got: ${output[quad[j]]}`);
+      }
+    }
+  }
+
+  return undefined;
+}
+
+g.test('compute,all_active')
+  .desc(
+    `Tests broadcast with all active invocations
+
+Quad operations require a full quad so workgroup sizes are limited to multiples of 4.
+  `
+  )
+  .params(u =>
+    u
+      .combine('wgSize', kWGSizes)
+      .filter(t => {
+        const wgThreads = t.wgSize[0] * t.wgSize[1] * t.wgSize[2];
+        return wgThreads % 4 === 0;
+      })
+      .beginSubcases()
+      .combine('id', [0, 1, 2, 3] as const)
+  )
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(async t => {
+    const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2];
+
+    const wgsl = `
+enable subgroups;
+
+@group(0) @binding(0)
+var<storage> inputs : u32; // unused
+
+struct Output {
+  results : array<u32, ${wgThreads}>,
+  subgroup_size : array<u32, ${wgThreads}>,
+}
+
+@group(0) @binding(1)
+var<storage, read_write> output : Output;
+
+struct Metadata {
+  id : array<u32, ${wgThreads}>,
+  subgroup_size : array<u32, ${wgThreads}>,
+}
+
+@group(0) @binding(2)
+var<storage, read_write> metadata : Metadata;
+
+@compute @workgroup_size(${t.params.wgSize[0]}, ${t.params.wgSize[1]}, ${t.params.wgSize[2]})
+fn main(
+  @builtin(local_invocation_index) lid : u32,
+  @builtin(subgroup_invocation_id) id : u32,
+  @builtin(subgroup_size) subgroupSize : u32,
+) {
+  // Force usage
+  _ = inputs;
+
+  let b = quadBroadcast(lid, ${t.params.id});
+  output.results[lid] = b;
+  output.subgroup_size[lid] = subgroupBroadcastFirst(lid + 1);
+  metadata.id[lid] = id;
+  metadata.subgroup_size[lid] = subgroupSize;
+}`;
+
+    const uintsPerOutput = 2;
+    await runComputeTest(
+      t,
+      wgsl,
+      [t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]],
+      uintsPerOutput,
+      new Uint32Array([0]), // unused
+      (metadata: Uint32Array, output: Uint32Array) => {
+        return checkBroadcastCompute(metadata, output, t.params.id, (id: number, size: number) => {
+          return true;
+        });
+      }
+    );
+  });
+
+g.test('compute,split')
+  .desc(
+    `Tests broadcast with predicated invocations
+
+Quad operations require a full quad so workgroup sizes are limited to multiples of 4.
+Quad operations require a fully active quad to operate correctly so several of the
+predication filters are skipped.
+  `
+  )
+  .params(u =>
+    u
+      .combine('predicate', keysOf(kPredicateCases))
+      .filter(t => {
+        return t.predicate === 'lower_half' || t.predicate === 'upper_half';
+      })
+      .combine('wgSize', kWGSizes)
+      .filter(t => {
+        const wgThreads = t.wgSize[0] * t.wgSize[1] * t.wgSize[2];
+        return wgThreads % 4 === 0;
+      })
+      .beginSubcases()
+      .combine('id', [0, 1, 2, 3] as const)
+  )
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(async t => {
+    const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2];
+    const testcase = kPredicateCases[t.params.predicate];
+
+    const wgsl = `
+enable subgroups;
+
+@group(0) @binding(0)
+var<storage> inputs : u32; // unused
+
+struct Output {
+  results : array<u32, ${wgThreads}>,
+  subgroup_size : array<u32, ${wgThreads}>,
+}
+
+@group(0) @binding(1)
+var<storage, read_write> output : Output;
+
+struct Metadata {
+  id : array<u32, ${wgThreads}>,
+  subgroup_size : array<u32, ${wgThreads}>,
+}
+
+@group(0) @binding(2)
+var<storage, read_write> metadata : Metadata;
+
+@compute @workgroup_size(${t.params.wgSize[0]}, ${t.params.wgSize[1]}, ${t.params.wgSize[2]})
+fn main(
+  @builtin(local_invocation_index) lid : u32,
+  @builtin(subgroup_invocation_id) id : u32,
+  @builtin(subgroup_size) subgroupSize : u32,
+) {
+  // Force usage
+  _ = inputs;
+
+  output.subgroup_size[lid] = subgroupBroadcastFirst(lid + 1);
+  metadata.id[lid] = id;
+  metadata.subgroup_size[lid] = subgroupSize;
+
+  if ${testcase.cond} {
+    let b = quadBroadcast(lid, ${t.params.id});
+    output.results[lid] = b;
+  }
+}`;
+
+    const uintsPerOutput = 2;
+    await runComputeTest(
+      t,
+      wgsl,
+      [t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]],
+      uintsPerOutput,
+      new Uint32Array([0]), // unused
+      (metadata: Uint32Array, output: Uint32Array) => {
+        return checkBroadcastCompute(metadata, output, t.params.id, testcase.filter);
+      }
+    );
+  });
+
+/**
+ * Checks results of quadBroadcast in fragment shaders.
+ *
+ * @param data The framebuffer output
+ *             * component 0 is the broadcast of the integer x position
+ *             * component 1 is the broadcast of the integer y position
+ * @param format The framebuffer format
+ * @param width Framebuffer width
+ * @param height Framebuffer height
+ * @param broadcast The quad id being broadcast
+ */
+function checkFragment(
+  data: Uint32Array,
+  format: GPUTextureFormat,
+  width: number,
+  height: number,
+  broadcast: number
+): Error | undefined {
+  assert(broadcast === Math.trunc(broadcast));
+  assert(broadcast >= 0 && broadcast <= 3);
+
+  if (width < 3 || height < 3) {
+    return new Error(
+      `Insufficient framebuffer size [${width}w x ${height}h]. Minimum is [3w x 3h].`
+    );
+  }
+
+  const { blockWidth, blockHeight, bytesPerBlock } = kTextureFormatInfo[format];
+  const blocksPerRow = width / blockWidth;
+  // 256 minimum comes from image copy requirements.
+  const bytesPerRow = align(blocksPerRow * (bytesPerBlock ?? 1), 256);
+  const uintsPerRow = bytesPerRow / 4;
+  const uintsPerTexel = (bytesPerBlock ?? 1) / blockWidth / blockHeight / 4;
+
+  const coordToIndex = (row: number, col: number) => {
+    return uintsPerRow * row + col * uintsPerTexel;
+  };
+
+  // Iteration skips last row and column to avoid helper invocations because it is not
+  // guaranteed whether or not they participate in the subgroup operation.
+  for (let row = 0; row < height - 1; row++) {
+    for (let col = 0; col < width - 1; col++) {
+      const offset = coordToIndex(row, col);
+
+      const row_is_odd = row % 2 === 1;
+      const col_is_odd = col % 2 === 1;
+
+      // Skip checking quads that extend into potential helper invocations.
+      const max_row = row_is_odd ? row : row + 1;
+      const max_col = col_is_odd ? col : col + 1;
+      if (max_row === height - 1 || max_col === width - 1) {
+        continue;
+      }
+
+      let expect_row = row;
+      let expect_col = col;
+      switch (broadcast) {
+        case 0:
+          expect_row = row_is_odd ? row - 1 : row;
+          expect_col = col_is_odd ? col - 1 : col;
+          break;
+        case 1:
+          expect_row = row_is_odd ? row - 1 : row;
+          expect_col = col_is_odd ? col : col + 1;
+          break;
+        case 2:
+          expect_row = row_is_odd ? row : row + 1;
+          expect_col = col_is_odd ? col - 1 : col;
+          break;
+        case 3:
+          expect_row = row_is_odd ? row : row + 1;
+          expect_col = col_is_odd ? col : col + 1;
+          break;
+      }
+
+      const row_broadcast = data[offset + 1];
+      const col_broadcast = data[offset];
+      if (expect_row !== row_broadcast) {
+        return new Error(`Row ${row}, col ${col}: incorrect row results:
+- expected: ${expect_row}
+-      got: ${row_broadcast}`);
+      }
+
+      if (expect_col !== col_broadcast) {
+        return new Error(`Row ${row}, col ${col}: incorrect col results:
+- expected: ${expect_row}
+-      got: ${col_broadcast}`);
+      }
+    }
+  }
+
+  return undefined;
+}
+
+g.test('fragment,all_active')
+  .desc(`Tests quadBroadcast in fragment shaders`)
+  .params(u =>
+    u
+      .combine('size', kFramebufferSizes)
+      .beginSubcases()
+      .combine('id', [0, 1, 2, 3] as const)
+      .combineWithParams([{ format: 'rgba32uint' }] as const)
+  )
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(async t => {
+    const fsShader = `
+enable subgroups;
+
+@group(0) @binding(0)
+var<storage, read_write> inputs : array<u32>; // unused
+
+@fragment
+fn main(
+  @builtin(position) pos : vec4f,
+) -> @location(0) vec4u {
+  // Force usage
+  _ = inputs[0];
+
+  let linear = u32(pos.x) + u32(pos.y) * ${t.params.size[0]};
+
+  // Filter out possible helper invocations.
+  let x_in_range = u32(pos.x) < (${t.params.size[0]} - 1);
+  let y_in_range = u32(pos.y) < (${t.params.size[1]} - 1);
+  let in_range = x_in_range && y_in_range;
+
+  var x_broadcast = select(1001, u32(pos.x), in_range);
+  var y_broadcast = select(1001, u32(pos.y), in_range);
+
+  x_broadcast = quadBroadcast(x_broadcast, ${t.params.id});
+  y_broadcast = quadBroadcast(y_broadcast, ${t.params.id});
+
+  return vec4u(x_broadcast, y_broadcast, 0, 0);
+}`;
+
+    await runFragmentTest(
+      t,
+      t.params.format,
+      fsShader,
+      t.params.size[0],
+      t.params.size[1],
+      new Uint32Array([0]), // unused,
+      (data: Uint32Array) => {
+        return checkFragment(
+          data,
+          t.params.format,
+          t.params.size[0],
+          t.params.size[1],
+          t.params.id
+        );
+      }
+    );
+  });
+
+g.test('fragment,split').unimplemented();
diff --git a/src/webgpu/shader/execution/expression/call/builtin/quadSwap.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/quadSwap.spec.ts
new file mode 100644
index 000000000000..e6b6863a8e94
--- /dev/null
+++ b/src/webgpu/shader/execution/expression/call/builtin/quadSwap.spec.ts
@@ -0,0 +1,666 @@
+export const description = `
+Execution tests for quadSwapX, quadSwapY, and quadSwapDiagnoal.
+
+Note: There is a lack of portability for non-uniform execution so these tests
+restrict themselves to uniform control flow.
+Note: There is no guaranteed mapping between subgroup_invocation_id and
+local_invocation_index. Tests should avoid assuming there is.
+`;
+
+import { makeTestGroup } from '../../../../../../common/framework/test_group.js';
+import { keysOf, objectsToRecord } from '../../../../../../common/util/data_tables.js';
+import { assert, unreachable } from '../../../../../../common/util/util.js';
+import { kTextureFormatInfo } from '../../../../../format_info.js';
+import { kBit } from '../../../../../util/constants.js';
+import {
+  kConcreteNumericScalarsAndVectors,
+  Type,
+  VectorType,
+  scalarTypeOf,
+} from '../../../../../util/conversion.js';
+import { align } from '../../../../../util/math.js';
+
+import {
+  kWGSizes,
+  kDataSentinel,
+  kPredicateCases,
+  runComputeTest,
+  SubgroupTest,
+  kFramebufferSizes,
+  runFragmentTest,
+} from './subgroup_util.js';
+
+export const g = makeTestGroup(SubgroupTest);
+
+const kTypes = objectsToRecord(kConcreteNumericScalarsAndVectors);
+
+type SwapOp = 'quadSwapX' | 'quadSwapY' | 'quadSwapDiagonal';
+
+const kOps: SwapOp[] = ['quadSwapX', 'quadSwapY', 'quadSwapDiagonal'];
+
+/**
+ * Generates scalar values for type
+ *
+ * Generates 4 32-bit values whose bit patterns represent
+ * interesting values of the data type.
+ * @param type The data type
+ */
+function generateScalarValues(type: Type): number[] {
+  const scalarTy = scalarTypeOf(type);
+  switch (scalarTy) {
+    case Type.u32:
+      return [kBit.u32.min, kBit.u32.max, 1111, 2222];
+    case Type.i32:
+      return [
+        kBit.i32.positive.min,
+        kBit.i32.positive.max,
+        kBit.i32.negative.min,
+        0xffffffff, // -1
+      ];
+    case Type.f32:
+      return [
+        kBit.f32.positive.zero,
+        kBit.f32.positive.nearest_max,
+        kBit.f32.negative.nearest_min,
+        0xbf800000, // -1
+      ];
+    case Type.f16:
+      return [
+        kBit.f16.positive.zero,
+        kBit.f16.positive.nearest_max,
+        kBit.f16.negative.nearest_min,
+        0xbc00, // -1
+      ];
+    default:
+      unreachable(`Unsupported type: ${type.toString()}`);
+  }
+  return [0, 0, 0, 0];
+}
+
+/**
+ * Generates input bit patterns for the input type
+ *
+ * Generates 4 values of type in a Uint32Array.
+ * 16-bit types are appropriately packed.
+ * @param type The data type
+ */
+function generateTypedInputs(type: Type): Uint32Array {
+  const scalarValues = generateScalarValues(type);
+  let elements = 1;
+  if (type instanceof VectorType) {
+    elements = type.width;
+  }
+  if (type.requiresF16()) {
+    switch (elements) {
+      case 1:
+        return new Uint32Array([
+          scalarValues[0] | (scalarValues[1] << 16),
+          scalarValues[2] | (scalarValues[3] << 16),
+        ]);
+      case 2:
+        return new Uint32Array([
+          scalarValues[0] | (scalarValues[0] << 16),
+          scalarValues[1] | (scalarValues[1] << 16),
+          scalarValues[2] | (scalarValues[2] << 16),
+          scalarValues[3] | (scalarValues[3] << 16),
+        ]);
+      case 3:
+        return new Uint32Array([
+          scalarValues[0] | (scalarValues[0] << 16),
+          scalarValues[0] | (kDataSentinel << 16),
+          scalarValues[1] | (scalarValues[1] << 16),
+          scalarValues[1] | (kDataSentinel << 16),
+          scalarValues[2] | (scalarValues[2] << 16),
+          scalarValues[2] | (kDataSentinel << 16),
+          scalarValues[3] | (scalarValues[3] << 16),
+          scalarValues[3] | (kDataSentinel << 16),
+        ]);
+      case 4:
+        return new Uint32Array([
+          scalarValues[0] | (scalarValues[0] << 16),
+          scalarValues[0] | (scalarValues[0] << 16),
+          scalarValues[1] | (scalarValues[1] << 16),
+          scalarValues[1] | (scalarValues[1] << 16),
+          scalarValues[2] | (scalarValues[2] << 16),
+          scalarValues[2] | (scalarValues[2] << 16),
+          scalarValues[3] | (scalarValues[3] << 16),
+          scalarValues[3] | (scalarValues[3] << 16),
+        ]);
+      default:
+        unreachable(`Unsupported type: ${type.toString()}`);
+    }
+    return new Uint32Array([0]);
+  } else {
+    const bound = elements === 3 ? 4 : elements;
+    const values: number[] = [];
+    for (let i = 0; i < 4; i++) {
+      for (let j = 0; j < bound; j++) {
+        if (j < elements) {
+          values.push(scalarValues[i]);
+        } else {
+          values.push(kDataSentinel);
+        }
+      }
+    }
+    return new Uint32Array(values);
+  }
+}
+
+/**
+ * Returns the swapped quad invocation id for the given op
+ *
+ * @param index The index in the range [0,3]
+ * @param op The swap
+ */
+function swapIndex(index: number, op: SwapOp): number {
+  assert(index === Math.trunc(index));
+  assert(index >= 0 && index <= 3);
+  switch (op) {
+    case 'quadSwapX':
+      return index ^ 1;
+    case 'quadSwapY':
+      return index ^ 2;
+    case 'quadSwapDiagonal':
+      return index ^ 3;
+  }
+  unreachable(`Unhandled op ${op}`);
+}
+
+/**
+ * Checks the results of data types test
+ *
+ * The outputs for a given index are expected to match the input values
+ * for the given swap.
+ * @param metadata An unused parameter
+ * @param output The output data
+ * @param input The input data
+ * @param op The type of swap
+ * @param type The data type
+ */
+function checkDataTypes(
+  metadata: Uint32Array, // unused
+  output: Uint32Array,
+  input: Uint32Array,
+  op: SwapOp,
+  type: Type
+): Error | undefined {
+  if (type.requiresF16() && !(type instanceof VectorType)) {
+    for (let i = 0; i < 4; i++) {
+      const swapIdx = swapIndex(i, op);
+
+      const expectIdx = Math.floor(swapIdx / 2);
+      const expectShift = swapIdx % 2 === 1;
+      let expect = input[expectIdx];
+      if (expectShift) {
+        expect >>= 16;
+      }
+      expect &= 0xffff;
+
+      const resIdx = Math.floor(i / 2);
+      const resShift = i % 2 === 1;
+      let res = output[resIdx];
+      if (resShift) {
+        res >>= 16;
+      }
+      res &= 0xffff;
+
+      if (res !== expect) {
+        return new Error(`${i}: incorrect result
+- expected: ${expect}
+-      got: ${res}`);
+      }
+    }
+  } else {
+    let uints = 1;
+    if (type instanceof VectorType) {
+      uints = type.width === 3 ? 4 : type.width;
+      if (type.requiresF16()) {
+        uints = Math.floor(uints / 2);
+      }
+    }
+    for (let i = 0; i < 4; i++) {
+      for (let j = 0; j < uints; j++) {
+        const expect = input[swapIndex(i, op) * uints + j];
+        const res = output[i * uints + j];
+        if (res !== expect) {
+          return new Error(`${uints * i + j}: incorrect result
+- expected: ${expect}
+-      got: ${res}`);
+        }
+      }
+    }
+  }
+
+  return undefined;
+}
+
+g.test('data_types')
+  .desc('Test allowed data types')
+  .params(u => u.combine('type', keysOf(kTypes)).beginSubcases().combine('op', kOps))
+  .beforeAllSubcases(t => {
+    const features: GPUFeatureName[] = ['subgroups' as GPUFeatureName];
+    const type = kTypes[t.params.type];
+    if (type.requiresF16()) {
+      features.push('subgroups-f16' as GPUFeatureName);
+      features.push('shader-f16' as GPUFeatureName);
+    }
+    t.selectDeviceOrSkipTestCase(features);
+  })
+  .fn(async t => {
+    const wgSize = [4, 1, 1];
+    const type = kTypes[t.params.type];
+    let enables = `enable subgroups;\n`;
+    if (type.requiresF16()) {
+      enables += `enable f16;\nenable subgroups_f16;`;
+    }
+    const wgsl = `
+${enables}
+
+@group(0) @binding(0)
+var<storage> input : array<${type.toString()}>;
+
+@group(0) @binding(1)
+var<storage, read_write> output : array<${type.toString()}>;
+
+@group(0) @binding(2)
+var<storage, read_write> metadata : array<u32>; // unused
+
+@compute @workgroup_size(${wgSize[0]}, ${wgSize[1]}, ${wgSize[2]})
+fn main(
+  @builtin(subgroup_invocation_id) id : u32,
+) {
+  // Force usage
+  _ = metadata[0];
+
+  output[id] = ${t.params.op}(input[id]);
+}`;
+
+    const inputData = generateTypedInputs(type);
+    let uintsPerOutput = 1;
+    if (type instanceof VectorType) {
+      uintsPerOutput = type.width === 3 ? 4 : type.width;
+      if (type.requiresF16()) {
+        uintsPerOutput = Math.floor(uintsPerOutput / 2);
+      }
+    }
+    await runComputeTest(
+      t,
+      wgsl,
+      wgSize,
+      uintsPerOutput,
+      inputData,
+      (metadata: Uint32Array, output: Uint32Array) => {
+        return checkDataTypes(metadata, output, inputData, t.params.op, type);
+      }
+    );
+  });
+
+/**
+ * Checks quad swaps in compute shaders
+ *
+ * Assumes that quads are linear within a subgroup.
+ *
+ * @param metadata An array of integers divided as follows:
+ *                 * first half subgroup invocation ids
+ *                 * second half subgroup sizes
+ * @param output An array of integers divided as follows:
+ *               * first half results of quad broadcast
+ *               * second half generated unique subgroup ids
+ * @param op The swap operation
+ * @param filter A functor to filter active invocations
+ */
+function checkSwapCompute(
+  metadata: Uint32Array,
+  output: Uint32Array,
+  op: SwapOp,
+  filter: (id: number, size: number) => boolean
+): Error | undefined {
+  const bound = Math.floor(output.length / 2);
+  for (let i = 0; i < bound; i++) {
+    const subgroup_id = output[bound + i];
+    const id = metadata[i];
+    const size = metadata[bound + i];
+    if (!filter(id, size)) {
+      if (output[i] !== kDataSentinel) {
+        return new Error(`Unexpected write for invocation ${i}`);
+      }
+      continue;
+    }
+
+    const quad_id = Math.floor(id / 4);
+    const quad_index = id % 4;
+    let found = false;
+    for (let j = 0; j < bound; j++) {
+      const other_id = metadata[j];
+      const other_quad_id = Math.floor(other_id / 4);
+      const other_quad_index = other_id % 4;
+      const other_subgroup_id = output[bound + j];
+      if (
+        subgroup_id === other_subgroup_id &&
+        quad_id === other_quad_id &&
+        quad_index === swapIndex(other_quad_index, op)
+      ) {
+        found = true;
+        if (output[i] !== j) {
+          return new Error(`Invocation ${i}: incorrect result
+- expected: ${j}
+-      got: ${output[i]}`);
+        }
+        break;
+      }
+    }
+    if (!found) {
+      return new Error(`Invocation ${i}: failed to find swapped result`);
+    }
+  }
+
+  return undefined;
+}
+
+g.test('compute,all_active')
+  .desc(
+    `Tests swaps with all active invocations
+
+Quad operations require a full quad so workgroup sizes are limited to multiples of 4.
+  `
+  )
+  .params(u =>
+    u
+      .combine('wgSize', kWGSizes)
+      .filter(t => {
+        const wgThreads = t.wgSize[0] * t.wgSize[1] * t.wgSize[2];
+        return wgThreads % 4 === 0;
+      })
+      .beginSubcases()
+      .combine('op', kOps)
+  )
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(async t => {
+    const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2];
+
+    const wgsl = `
+enable subgroups;
+
+@group(0) @binding(0)
+var<storage> inputs : u32; // unused
+
+struct Output {
+  results : array<u32, ${wgThreads}>,
+  subgroup_size : array<u32, ${wgThreads}>,
+}
+
+@group(0) @binding(1)
+var<storage, read_write> output : Output;
+
+struct Metadata {
+  id : array<u32, ${wgThreads}>,
+  subgroup_size : array<u32, ${wgThreads}>,
+}
+
+@group(0) @binding(2)
+var<storage, read_write> metadata : Metadata;
+
+@compute @workgroup_size(${t.params.wgSize[0]}, ${t.params.wgSize[1]}, ${t.params.wgSize[2]})
+fn main(
+  @builtin(local_invocation_index) lid : u32,
+  @builtin(subgroup_invocation_id) id : u32,
+  @builtin(subgroup_size) subgroupSize : u32,
+) {
+  // Force usage
+  _ = inputs;
+
+  let b = ${t.params.op}(lid);
+  output.results[lid] = b;
+  output.subgroup_size[lid] = subgroupBroadcastFirst(lid + 1);
+  metadata.id[lid] = id;
+  metadata.subgroup_size[lid] = subgroupSize;
+}`;
+
+    const uintsPerOutput = 2;
+    await runComputeTest(
+      t,
+      wgsl,
+      [t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]],
+      uintsPerOutput,
+      new Uint32Array([0]), // unused
+      (metadata: Uint32Array, output: Uint32Array) => {
+        return checkSwapCompute(metadata, output, t.params.op, (id: number, size: number) => {
+          return true;
+        });
+      }
+    );
+  });
+
+g.test('compute,split')
+  .desc(
+    `Tests swaps with all predicated invocations
+
+Quad operations require a full quad so workgroup sizes are limited to multiples of 4.
+Quad operations require a fully active quad to operate correctly so several of the
+predication filters are skipped.
+  `
+  )
+  .params(u =>
+    u
+      .combine('predicate', keysOf(kPredicateCases))
+      .filter(t => {
+        return t.predicate === 'lower_half' || t.predicate === 'upper_half';
+      })
+      .combine('wgSize', kWGSizes)
+      .filter(t => {
+        const wgThreads = t.wgSize[0] * t.wgSize[1] * t.wgSize[2];
+        return wgThreads % 4 === 0;
+      })
+      .beginSubcases()
+      .combine('op', kOps)
+  )
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(async t => {
+    const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2];
+    const testcase = kPredicateCases[t.params.predicate];
+
+    const wgsl = `
+enable subgroups;
+
+@group(0) @binding(0)
+var<storage> inputs : u32; // unused
+
+struct Output {
+  results : array<u32, ${wgThreads}>,
+  subgroup_size : array<u32, ${wgThreads}>,
+}
+
+@group(0) @binding(1)
+var<storage, read_write> output : Output;
+
+struct Metadata {
+  id : array<u32, ${wgThreads}>,
+  subgroup_size : array<u32, ${wgThreads}>,
+}
+
+@group(0) @binding(2)
+var<storage, read_write> metadata : Metadata;
+
+@compute @workgroup_size(${t.params.wgSize[0]}, ${t.params.wgSize[1]}, ${t.params.wgSize[2]})
+fn main(
+  @builtin(local_invocation_index) lid : u32,
+  @builtin(subgroup_invocation_id) id : u32,
+  @builtin(subgroup_size) subgroupSize : u32,
+) {
+  // Force usage
+  _ = inputs;
+
+  output.subgroup_size[lid] = subgroupBroadcastFirst(lid + 1);
+  metadata.id[lid] = id;
+  metadata.subgroup_size[lid] = subgroupSize;
+
+  if ${testcase.cond} {
+    let b = ${t.params.op}(lid);
+    output.results[lid] = b;
+  }
+}`;
+
+    const uintsPerOutput = 2;
+    await runComputeTest(
+      t,
+      wgsl,
+      [t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]],
+      uintsPerOutput,
+      new Uint32Array([0]), // unused
+      (metadata: Uint32Array, output: Uint32Array) => {
+        return checkSwapCompute(metadata, output, t.params.op, testcase.filter);
+      }
+    );
+  });
+
+/**
+ * Checks results of quad swaps in fragment shaders.
+ *
+ * @param data The framebuffer output
+ *             * component 0 is the broadcast of the integer x position
+ *             * component 1 is the broadcast of the integer y position
+ * @param format The framebuffer format
+ * @param width Framebuffer width
+ * @param height Framebuffer height
+ * @param broadcast The quad id being broadcast
+ */
+function checkFragment(
+  data: Uint32Array,
+  format: GPUTextureFormat,
+  width: number,
+  height: number,
+  op: SwapOp
+): Error | undefined {
+  if (width < 3 || height < 3) {
+    return new Error(
+      `Insufficient framebuffer size [${width}w x ${height}h]. Minimum is [3w x 3h].`
+    );
+  }
+
+  const { blockWidth, blockHeight, bytesPerBlock } = kTextureFormatInfo[format];
+  const blocksPerRow = width / blockWidth;
+  // 256 minimum comes from image copy requirements.
+  const bytesPerRow = align(blocksPerRow * (bytesPerBlock ?? 1), 256);
+  const uintsPerRow = bytesPerRow / 4;
+  const uintsPerTexel = (bytesPerBlock ?? 1) / blockWidth / blockHeight / 4;
+
+  const coordToIndex = (row: number, col: number) => {
+    return uintsPerRow * row + col * uintsPerTexel;
+  };
+
+  // Iteration skips last row and column to avoid helper invocations because it is not
+  // guaranteed whether or not they participate in the subgroup operation.
+  for (let row = 0; row < height - 1; row++) {
+    for (let col = 0; col < width - 1; col++) {
+      const offset = coordToIndex(row, col);
+
+      const row_is_odd = row % 2 === 1;
+      const col_is_odd = col % 2 === 1;
+
+      // Skip checking quads that extend into potential helper invocations.
+      const max_row = row_is_odd ? row : row + 1;
+      const max_col = col_is_odd ? col : col + 1;
+      if (max_row === height - 1 || max_col === width - 1) {
+        continue;
+      }
+
+      let expect_row = row;
+      let expect_col = col;
+      switch (op) {
+        case 'quadSwapX':
+          expect_col = col_is_odd ? col - 1 : col + 1;
+          break;
+        case 'quadSwapY':
+          expect_row = row_is_odd ? row - 1 : row + 1;
+          break;
+        case 'quadSwapDiagonal':
+          expect_row = row_is_odd ? row - 1 : row + 1;
+          expect_col = col_is_odd ? col - 1 : col + 1;
+          break;
+      }
+
+      const row_output = data[offset + 1];
+      const col_output = data[offset];
+      if (expect_row !== row_output) {
+        return new Error(`Row ${row}, col ${col}: incorrect row results:
+- expected: ${expect_row}
+-      got: ${row_output}`);
+      }
+
+      if (expect_col !== col_output) {
+        return new Error(`Row ${row}, col ${col}: incorrect col results:
+- expected: ${expect_row}
+-      got: ${col_output}`);
+      }
+    }
+  }
+
+  return undefined;
+}
+
+g.test('fragment,all_active')
+  .desc(`Tests quad swaps in fragment shaders`)
+  .params(u =>
+    u
+      .combine('size', kFramebufferSizes)
+      .beginSubcases()
+      .combine('op', kOps)
+      .combineWithParams([{ format: 'rgba32uint' }] as const)
+  )
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(async t => {
+    const fsShader = `
+enable subgroups;
+
+@group(0) @binding(0)
+var<storage, read_write> inputs : array<u32>; // unused
+
+@fragment
+fn main(
+  @builtin(position) pos : vec4f,
+) -> @location(0) vec4u {
+  // Force usage
+  _ = inputs[0];
+
+  let linear = u32(pos.x) + u32(pos.y) * ${t.params.size[0]};
+
+  // Filter out possible helper invocations.
+  let x_in_range = u32(pos.x) < (${t.params.size[0]} - 1);
+  let y_in_range = u32(pos.y) < (${t.params.size[1]} - 1);
+  let in_range = x_in_range && y_in_range;
+
+  var x_swap = select(1001, u32(pos.x), in_range);
+  var y_swap = select(1001, u32(pos.y), in_range);
+
+  x_swap = ${t.params.op}(x_swap);
+  y_swap = ${t.params.op}(y_swap);
+
+  return vec4u(x_swap, y_swap, 0, 0);
+}`;
+
+    await runFragmentTest(
+      t,
+      t.params.format,
+      fsShader,
+      t.params.size[0],
+      t.params.size[1],
+      new Uint32Array([0]), // unused,
+      (data: Uint32Array) => {
+        return checkFragment(
+          data,
+          t.params.format,
+          t.params.size[0],
+          t.params.size[1],
+          t.params.op
+        );
+      }
+    );
+  });
+
+g.test('fragment,split').unimplemented();
diff --git a/src/webgpu/shader/execution/expression/call/builtin/smoothstep.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/smoothstep.spec.ts
index 42d8d09ff569..f65bb951bf25 100644
--- a/src/webgpu/shader/execution/expression/call/builtin/smoothstep.spec.ts
+++ b/src/webgpu/shader/execution/expression/call/builtin/smoothstep.spec.ts
@@ -7,11 +7,16 @@ T is S or vecN<S>
 Returns the smooth Hermite interpolation between 0 and 1.
 Component-wise when T is a vector.
 For scalar T, the result is t * t * (3.0 - 2.0 * t), where t = clamp((x - low) / (high - low), 0.0, 1.0).
+
+If low >= high:
+* It is a shader-creation error if low and high are const-expressions.
+* It is a pipeline-creation error if low and high are override-expressions.
 `;
 
 import { makeTestGroup } from '../../../../../../common/framework/test_group.js';
 import { GPUTest } from '../../../../../gpu_test.js';
-import { Type } from '../../../../../util/conversion.js';
+import { ScalarValue, Type, Value } from '../../../../../util/conversion.js';
+import { Case } from '../../case.js';
 import { allInputSources, onlyConstInputSource, run } from '../../expression.js';
 
 import { abstractFloatBuiltin, builtin } from './builtin.js';
@@ -19,6 +24,13 @@ import { d } from './smoothstep.cache.js';
 
 export const g = makeTestGroup(GPUTest);
 
+// Returns true if `c` is valid for a const evaluation of smoothstep.
+function validForConst(c: Case): boolean {
+  const low = (c.input as Value[])[0] as ScalarValue;
+  const high = (c.input as Value[])[1] as ScalarValue;
+  return low.value < high.value;
+}
+
 g.test('abstract_float')
   .specURL('https://www.w3.org/TR/WGSL/#float-builtin-functions')
   .desc(`abstract float tests`)
@@ -28,7 +40,7 @@ g.test('abstract_float')
       .combine('vectorize', [undefined, 2, 3, 4] as const)
   )
   .fn(async t => {
-    const cases = await d.get('abstract_const');
+    const cases = (await d.get('abstract_const')).filter(c => validForConst(c));
     await run(
       t,
       abstractFloatBuiltin('smoothstep'),
@@ -47,7 +59,15 @@ g.test('f32')
   )
   .fn(async t => {
     const cases = await d.get(t.params.inputSource === 'const' ? 'f32_const' : 'f32_non_const');
-    await run(t, builtin('smoothstep'), [Type.f32, Type.f32, Type.f32], Type.f32, t.params, cases);
+    const validCases = cases.filter(c => t.params.inputSource !== 'const' || validForConst(c));
+    await run(
+      t,
+      builtin('smoothstep'),
+      [Type.f32, Type.f32, Type.f32],
+      Type.f32,
+      t.params,
+      validCases
+    );
   });
 
 g.test('f16')
@@ -61,5 +81,13 @@ g.test('f16')
   })
   .fn(async t => {
     const cases = await d.get(t.params.inputSource === 'const' ? 'f16_const' : 'f16_non_const');
-    await run(t, builtin('smoothstep'), [Type.f16, Type.f16, Type.f16], Type.f16, t.params, cases);
+    const validCases = cases.filter(c => t.params.inputSource !== 'const' || validForConst(c));
+    await run(
+      t,
+      builtin('smoothstep'),
+      [Type.f16, Type.f16, Type.f16],
+      Type.f16,
+      t.params,
+      validCases
+    );
   });
diff --git a/src/webgpu/shader/execution/expression/call/builtin/subgroupAdd.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/subgroupAdd.spec.ts
new file mode 100644
index 000000000000..04792b2d98c1
--- /dev/null
+++ b/src/webgpu/shader/execution/expression/call/builtin/subgroupAdd.spec.ts
@@ -0,0 +1,364 @@
+export const description = `
+Execution tests for subgroupAdd, subgroupExclusiveAdd, and subgroupInclusiveAdd
+
+Note: There is a lack of portability for non-uniform execution so these tests
+restrict themselves to uniform control flow.
+Note: There is no guaranteed mapping between subgroup_invocation_id and
+local_invocation_index. Tests should avoid assuming there is.
+`;
+
+import { makeTestGroup } from '../../../../../../common/framework/test_group.js';
+import { keysOf, objectsToRecord } from '../../../../../../common/util/data_tables.js';
+import { iterRange } from '../../../../../../common/util/util.js';
+import { GPUTest } from '../../../../../gpu_test.js';
+import {
+  kConcreteNumericScalarsAndVectors,
+  Type,
+  VectorType,
+  numberToFloatBits,
+  floatBitsToNumber,
+  kFloat32Format,
+  kFloat16Format,
+  scalarTypeOf,
+} from '../../../../../util/conversion.js';
+import { FP } from '../../../../../util/floating_point.js';
+
+import {
+  kNumCases,
+  kStride,
+  kWGSizes,
+  kPredicateCases,
+  runAccuracyTest,
+  runComputeTest,
+} from './subgroup_util.js';
+
+export const g = makeTestGroup(GPUTest);
+
+const kIdentity = 0;
+
+const kDataTypes = objectsToRecord(kConcreteNumericScalarsAndVectors);
+
+const kOperations = ['subgroupAdd', 'subgroupExclusiveAdd', 'subgroupInclusiveAdd'] as const;
+
+g.test('fp_accuracy')
+  .desc(
+    `Tests the accuracy of floating-point addition.
+
+The order of operations is implementation defined, most threads are filled with
+the identity value and two receive random values.
+Subgroup sizes are not known ahead of time so some cases may not perform any
+interesting operations. The test biases towards checking subgroup sizes under 64.
+These tests only check two values in order to reuse more of the existing infrastructure
+and limit the number of permutations needed to calculate the final result.`
+  )
+  .params(u =>
+    u
+      .combine('case', [...iterRange(kNumCases, x => x)])
+      .combine('type', ['f32', 'f16'] as const)
+      .combine('wgSize', [
+        [kStride, 1, 1],
+        [kStride / 2, 2, 1],
+      ] as const)
+  )
+  .beforeAllSubcases(t => {
+    const features: GPUFeatureName[] = ['subgroups' as GPUFeatureName];
+    if (t.params.type === 'f16') {
+      features.push('shader-f16');
+      features.push('subgroups-f16' as GPUFeatureName);
+    }
+    t.selectDeviceOrSkipTestCase(features);
+  })
+  .fn(async t => {
+    await runAccuracyTest(
+      t,
+      t.params.case,
+      [t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]],
+      'subgroupAdd',
+      t.params.type,
+      kIdentity,
+      t.params.type === 'f16' ? FP.f16.additionInterval : FP.f32.additionInterval
+    );
+  });
+
+/**
+ * Checks subgroup additions
+ *
+ * Expected results:
+ * - subgroupAdd: each invocation should have result equal to real subgroup size
+ * - subgroupExclusiveAdd: each invocation should have result equal to its subgroup invocation id
+ * - subgroupInclusiveAdd: each invocation should be equal to the result of subgroupExclusiveAdd plus the fill value
+ * @param metadata An array containing actual subgroup size per invocation followed by
+ *                 subgroup invocation id per invocation
+ * @param output An array of additions
+ * @param type The data type
+ * @param operation Type of addition
+ * @param expectedfillValue The original value used to fill the test array
+ */
+function checkAddition(
+  metadata: Uint32Array,
+  output: Uint32Array,
+  type: Type,
+  operation: 'subgroupAdd' | 'subgroupExclusiveAdd' | 'subgroupInclusiveAdd',
+  expectedfillValue: number
+): undefined | Error {
+  let numEles = 1;
+  if (type instanceof VectorType) {
+    numEles = type.width;
+  }
+  const scalarTy = scalarTypeOf(type);
+  const expectedOffset = operation === 'subgroupAdd' ? 0 : metadata.length / 2;
+  for (let i = 0; i < metadata.length / 2; i++) {
+    let expected = metadata[i + expectedOffset];
+    if (operation === 'subgroupInclusiveAdd') {
+      expected += expectedfillValue;
+    }
+
+    for (let j = 0; j < numEles; j++) {
+      let idx = i * numEles + j;
+      const isOdd = idx & 0x1;
+      if (scalarTy === Type.f16) {
+        idx = Math.floor(idx / 2);
+      }
+      let val = output[idx];
+      if (scalarTy === Type.f32) {
+        val = floatBitsToNumber(val, kFloat32Format);
+      } else if (scalarTy === Type.f16) {
+        if (isOdd) {
+          val = val >> 16;
+        }
+        val = floatBitsToNumber(val & 0xffff, kFloat16Format);
+      }
+      if (expected !== val) {
+        return new Error(`Invocation ${i}, component ${j}: incorrect result
+- expected: ${expected}
+-      got: ${val}`);
+      }
+    }
+  }
+
+  return undefined;
+}
+
+g.test('data_types')
+  .desc(
+    `Tests subgroup addition for valid data types
+
+Tests a simple addition of all 1 values.
+Reductions expect result to be equal to actual subgroup size.
+Exclusice scans expect result to be equal subgroup invocation id.
+
+TODO: support vec3 types.
+  `
+  )
+  .params(u =>
+    u
+      .combine('type', keysOf(kDataTypes))
+      .filter(t => {
+        const type = kDataTypes[t.type];
+        if (type instanceof VectorType) {
+          return type.width !== 3;
+        }
+        return true;
+      })
+      .beginSubcases()
+      .combine('wgSize', kWGSizes)
+      .combine('operation', kOperations)
+  )
+  .beforeAllSubcases(t => {
+    const features: GPUFeatureName[] = ['subgroups' as GPUFeatureName];
+    const type = kDataTypes[t.params.type];
+    if (type.requiresF16()) {
+      features.push('shader-f16');
+      features.push('subgroups-f16' as GPUFeatureName);
+    }
+    t.selectDeviceOrSkipTestCase(features);
+  })
+  .fn(async t => {
+    const type = kDataTypes[t.params.type];
+    let numEles = 1;
+    if (type instanceof VectorType) {
+      numEles = type.width;
+    }
+    const scalarType = scalarTypeOf(type);
+    let enables = 'enable subgroups;\n';
+    if (type.requiresF16()) {
+      enables += 'enable f16;\nenable subgroups_f16;\n';
+    }
+
+    const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2];
+
+    const wgsl = `
+${enables}
+
+@group(0) @binding(0)
+var<storage> inputs : array<${type.toString()}>;
+
+@group(0) @binding(1)
+var<storage, read_write> outputs : array<${type.toString()}>;
+
+struct Metadata {
+  subgroup_size : array<u32, ${wgThreads}>,
+  subgroup_invocation_id : array<u32, ${wgThreads}>,
+}
+
+@group(0) @binding(2)
+var<storage, read_write> metadata : Metadata;
+
+@compute @workgroup_size(${t.params.wgSize[0]}, ${t.params.wgSize[1]}, ${t.params.wgSize[2]})
+fn main(
+  @builtin(local_invocation_index) lid : u32,
+  @builtin(subgroup_invocation_id) id : u32,
+) {
+  // Record the actual subgroup size for this invocation.
+  // Note: subgroup_size builtin is always a power-of-2 and might be larger
+  // if the subgroup is not full.
+  let ballot = subgroupBallot(true);
+  var size = countOneBits(ballot.x);
+  size += countOneBits(ballot.y);
+  size += countOneBits(ballot.z);
+  size += countOneBits(ballot.w);
+  metadata.subgroup_size[lid] = size;
+
+  // Record subgroup invocation id for this invocation.
+  metadata.subgroup_invocation_id[lid] = id;
+
+  outputs[lid] = ${t.params.operation}(inputs[lid]);
+}`;
+    const expectedFillValue = 1;
+    let fillValue = expectedFillValue;
+    let numUints = wgThreads * numEles;
+    if (scalarType === Type.f32) {
+      fillValue = numberToFloatBits(1, kFloat32Format);
+    } else if (scalarType === Type.f16) {
+      const f16 = numberToFloatBits(1, kFloat16Format);
+      fillValue = f16 | (f16 << 16);
+      numUints = Math.ceil(numUints / 2);
+    }
+    await runComputeTest(
+      t,
+      wgsl,
+      [t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]],
+      numUints,
+      new Uint32Array([...iterRange(numUints, x => fillValue)]),
+      (metadata: Uint32Array, output: Uint32Array) => {
+        return checkAddition(metadata, output, type, t.params.operation, expectedFillValue);
+      }
+    );
+  });
+
+g.test('fragment').unimplemented();
+
+/**
+ * Performs correctness checking for predicated additions
+ *
+ * Assumes the shader performs a predicated subgroup addition with the
+ * subgroup_invocation_id as the data.
+ *
+ * @param metadata An array containing subgroup sizes and subgroup invocation ids
+ * @param output An array containing the output results
+ * @param operation The type of addition
+ * @param filter A functor that mirrors the predication in the shader
+ */
+function checkPredicatedAddition(
+  metadata: Uint32Array,
+  output: Uint32Array,
+  operation: 'subgroupAdd' | 'subgroupExclusiveAdd' | 'subgroupInclusiveAdd',
+  filter: (id: number, size: number) => boolean
+): Error | undefined {
+  for (let i = 0; i < output.length; i++) {
+    const size = metadata[i];
+    const id = metadata[output.length + i];
+    let expected = 0;
+    if (filter(id, size)) {
+      const bound =
+        operation === 'subgroupInclusiveAdd' ? id + 1 : operation === 'subgroupAdd' ? size : id;
+      for (let j = 0; j < bound; j++) {
+        if (filter(j, size)) {
+          expected += j;
+        }
+      }
+    } else {
+      expected = 999;
+    }
+    if (expected !== output[i]) {
+      return new Error(`Invocation ${i}: incorrect result
+- expected: ${expected}
+-      got: ${output[i]}`);
+    }
+  }
+  return undefined;
+}
+
+g.test('compute,split')
+  .desc('Tests that only active invocations contribute to the operation')
+  .params(u =>
+    u
+      .combine('case', keysOf(kPredicateCases))
+      .beginSubcases()
+      .combine('operation', kOperations)
+      .combine('wgSize', kWGSizes)
+  )
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(async t => {
+    const testcase = kPredicateCases[t.params.case];
+    const outputUintsPerElement = 1;
+    const inputData = new Uint32Array([0]); // no input data
+    const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2];
+
+    const wgsl = `
+enable subgroups;
+
+@group(0) @binding(0)
+var<storage> input : array<u32>;
+
+@group(0) @binding(1)
+var<storage, read_write> outputs : array<u32>;
+
+struct Metadata {
+  subgroup_size : array<u32, ${wgThreads}>,
+  subgroup_invocation_id : array<u32, ${wgThreads}>,
+}
+
+@group(0) @binding(2)
+var<storage, read_write> metadata : Metadata;
+
+@compute @workgroup_size(${t.params.wgSize[0]}, ${t.params.wgSize[1]}, ${t.params.wgSize[2]})
+fn main(
+  @builtin(local_invocation_index) lid : u32,
+  @builtin(subgroup_invocation_id) id : u32,
+) {
+  _ = input[0];
+
+  // Record the actual subgroup size for this invocation.
+  // Note: subgroup_size builtin is always a power-of-2 and might be larger
+  // if the subgroup is not full.
+  let ballot = subgroupBallot(true);
+  var subgroupSize = countOneBits(ballot.x);
+  subgroupSize += countOneBits(ballot.y);
+  subgroupSize += countOneBits(ballot.z);
+  subgroupSize += countOneBits(ballot.w);
+  metadata.subgroup_size[lid] = subgroupSize;
+
+  // Record subgroup invocation id for this invocation.
+  metadata.subgroup_invocation_id[lid] = id;
+
+  if ${testcase.cond} {
+    outputs[lid] = ${t.params.operation}(id);
+  } else {
+    return;
+  }
+}`;
+
+    await runComputeTest(
+      t,
+      wgsl,
+      [t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]],
+      outputUintsPerElement,
+      inputData,
+      (metadata: Uint32Array, output: Uint32Array) => {
+        return checkPredicatedAddition(metadata, output, t.params.operation, testcase.filter);
+      }
+    );
+  });
diff --git a/src/webgpu/shader/execution/expression/call/builtin/subgroupAll.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/subgroupAll.spec.ts
new file mode 100644
index 000000000000..0aa461c4a578
--- /dev/null
+++ b/src/webgpu/shader/execution/expression/call/builtin/subgroupAll.spec.ts
@@ -0,0 +1,390 @@
+export const description = `
+Execution tests for subgroupAll.
+
+Note: There is a lack of portability for non-uniform execution so these tests
+restrict themselves to uniform control flow.
+Note: There is no guaranteed mapping between subgroup_invocation_id and
+local_invocation_index. Tests should avoid assuming there is.
+`;
+
+import { makeTestGroup } from '../../../../../../common/framework/test_group.js';
+import { keysOf } from '../../../../../../common/util/data_tables.js';
+import { iterRange } from '../../../../../../common/util/util.js';
+import { kTextureFormatInfo } from '../../../../../format_info.js';
+import { align } from '../../../../../util/math.js';
+import { PRNG } from '../../../../../util/prng.js';
+
+import {
+  kWGSizes,
+  kPredicateCases,
+  SubgroupTest,
+  kDataSentinel,
+  kFramebufferSizes,
+  runComputeTest,
+  runFragmentTest,
+} from './subgroup_util.js';
+
+export const g = makeTestGroup(SubgroupTest);
+
+const kNumCases = 15;
+
+/**
+ * Generate input data for testing.
+ *
+ * Data is generated in the following categories:
+ * Seed 0 generates all 0 data
+ * Seed 1 generates all 1 data
+ * Seeds 2-9 generates all 1s except for a zero randomly once per 32 elements
+ * Seeds 10+ generate all random data
+ * @param seed The seed for the PRNG
+ * @param num The number of data items to generate
+ */
+function generateInputData(seed: number, num: number): Uint32Array {
+  const prng = new PRNG(seed);
+
+  const bound = Math.min(num, 32);
+  const index = prng.uniformInt(bound);
+
+  return new Uint32Array([
+    ...iterRange(num, x => {
+      if (seed === 0) {
+        return 0;
+      } else if (seed === 1) {
+        return 1;
+      } else if (seed < 10) {
+        const bounded = x % bound;
+        return bounded === index ? 0 : 1;
+      }
+      return prng.uniformInt(2);
+    }),
+  ]);
+}
+
+/**
+ * Checks the result of a subgroupAll operation
+ *
+ * Since subgroup size depends on the pipeline compile, we calculate the expected
+ * results after execution. The shader generates a subgroup id and records it for
+ * each invocation. The check first calculates the expected result for each subgroup
+ * and then compares to the actual result for each invocation. The filter functor
+ * ensures only the correct invocations contribute to the calculation.
+ * @param metadata An array of uints:
+ *                 * first half containing subgroup sizes (from builtin value)
+ *                 * second half subgroup invocation id
+ * @param output An array of uints containing:
+ *               * first half is the outputs of subgroupAll
+ *               * second half is a generated subgroup id
+ * @param numInvs Number of invocations executed
+ * @param input The input data (equal size to output)
+ * @param filter A functor to filter active invocations
+ */
+function checkAll(
+  metadata: Uint32Array, // unused
+  output: Uint32Array,
+  numInvs: number,
+  input: Uint32Array,
+  filter: (id: number, size: number) => boolean
+): Error | undefined {
+  // First, generate expected results.
+  const expected = new Map<number, number>();
+  for (let inv = 0; inv < numInvs; inv++) {
+    const size = metadata[inv];
+    const id = metadata[inv + numInvs];
+    if (!filter(id, size)) {
+      continue;
+    }
+    const subgroup_id = output[numInvs + inv];
+    let v = expected.get(subgroup_id) ?? 1;
+    v &= input[inv];
+    expected.set(subgroup_id, v);
+  }
+
+  // Second, check against actual results.
+  for (let inv = 0; inv < numInvs; inv++) {
+    const size = metadata[inv];
+    const id = metadata[inv + numInvs];
+    const res = output[inv];
+    if (filter(id, size)) {
+      const subgroup_id = output[numInvs + inv];
+      const expected_v = expected.get(subgroup_id) ?? 0;
+      if (expected_v !== res) {
+        return new Error(`Invocation ${inv}:
+- expected: ${expected_v}
+-      got: ${res}`);
+      }
+    } else {
+      if (res !== kDataSentinel) {
+        return new Error(`Invocation ${inv} unexpected write:
+- subgroup invocation id: ${id}
+-          subgroup size: ${size}`);
+      }
+    }
+  }
+
+  return undefined;
+}
+
+g.test('compute,all_active')
+  .desc(`Test compute subgroupAll`)
+  .params(u =>
+    u
+      .combine('wgSize', kWGSizes)
+      .beginSubcases()
+      .combine('case', [...iterRange(kNumCases, x => x)])
+  )
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(async t => {
+    const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2];
+
+    const wgsl = `
+enable subgroups;
+
+@group(0) @binding(0)
+var<storage> inputs : array<u32>;
+
+@group(0) @binding(1)
+var<storage, read_write> outputs : array<u32>;
+
+struct Metadata {
+  subgroup_size: array<u32, ${wgThreads}>,
+  subgroup_invocation_id: array<u32, ${wgThreads}>,
+}
+
+@group(0) @binding(2)
+var<storage, read_write> metadata : Metadata;
+
+@compute @workgroup_size(${t.params.wgSize[0]}, ${t.params.wgSize[1]}, ${t.params.wgSize[2]})
+fn main(
+  @builtin(local_invocation_index) lid : u32,
+  @builtin(subgroup_invocation_id) id : u32,
+  @builtin(subgroup_size) subgroupSize : u32,
+) {
+  metadata.subgroup_size[lid] = subgroupSize;
+
+  metadata.subgroup_invocation_id[lid] = id;
+
+  // Record a representative subgroup id.
+  outputs[lid + ${wgThreads}] = subgroupBroadcastFirst(lid);
+
+  let res = select(0u, 1u, subgroupAll(bool(inputs[lid])));
+  outputs[lid] = res;
+}`;
+
+    const inputData = generateInputData(t.params.case, wgThreads);
+
+    const uintsPerOutput = 2;
+    await runComputeTest(
+      t,
+      wgsl,
+      [t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]],
+      uintsPerOutput,
+      inputData,
+      (metadata: Uint32Array, output: Uint32Array) => {
+        return checkAll(metadata, output, wgThreads, inputData, (id: number, size: number) => {
+          return true;
+        });
+      }
+    );
+  });
+
+g.test('compute,split')
+  .desc('Test that only active invocation participate')
+  .params(u =>
+    u
+      .combine('predicate', keysOf(kPredicateCases))
+      .beginSubcases()
+      .combine('wgSize', kWGSizes)
+      .combine('case', [...iterRange(kNumCases, x => x)])
+  )
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(async t => {
+    const testcase = kPredicateCases[t.params.predicate];
+    const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2];
+
+    const wgsl = `
+enable subgroups;
+
+@group(0) @binding(0)
+var<storage> inputs : array<u32>;
+
+@group(0) @binding(1)
+var<storage, read_write> outputs : array<u32>;
+
+struct Metadata {
+  subgroup_size : array<u32, ${wgThreads}>,
+  subgroup_invocation_id : array<u32, ${wgThreads}>,
+}
+
+@group(0) @binding(2)
+var<storage, read_write> metadata : Metadata;
+
+@compute @workgroup_size(${t.params.wgSize[0]}, ${t.params.wgSize[1]}, ${t.params.wgSize[2]})
+fn main(
+  @builtin(local_invocation_index) lid : u32,
+  @builtin(subgroup_invocation_id) id : u32,
+  @builtin(subgroup_size) subgroupSize : u32,
+) {
+  metadata.subgroup_size[lid] = subgroupSize;
+
+  // Record subgroup invocation id for this invocation.
+  metadata.subgroup_invocation_id[lid] = id;
+
+  // Record a generated subgroup id.
+  outputs[${wgThreads} + lid] = subgroupBroadcastFirst(lid);
+
+  if ${testcase.cond} {
+    outputs[lid] = select(0u, 1u, subgroupAll(bool(inputs[lid])));
+  } else {
+    return;
+  }
+}`;
+
+    const inputData = generateInputData(t.params.case, wgThreads);
+
+    const uintsPerOutput = 2;
+    await runComputeTest(
+      t,
+      wgsl,
+      [t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]],
+      uintsPerOutput,
+      inputData,
+      (metadata: Uint32Array, output: Uint32Array) => {
+        return checkAll(metadata, output, wgThreads, inputData, testcase.filter);
+      }
+    );
+  });
+
+/**
+ * Checks subgroupAll results from a fragment shader.
+ *
+ * @param data Framebuffer output
+ *             * component 0 is result
+ *             * component 1 is generated subgroup id
+ * @param input An array of input data
+ * @param format The framebuffer format
+ * @param width Framebuffer width
+ * @param height Framebuffer height
+ */
+function checkFragmentAll(
+  data: Uint32Array,
+  input: Uint32Array,
+  format: GPUTextureFormat,
+  width: number,
+  height: number
+): Error | undefined {
+  const { blockWidth, blockHeight, bytesPerBlock } = kTextureFormatInfo[format];
+  const blocksPerRow = width / blockWidth;
+  // 256 minimum comes from image copy requirements.
+  const bytesPerRow = align(blocksPerRow * (bytesPerBlock ?? 1), 256);
+  const uintsPerRow = bytesPerRow / 4;
+  const uintsPerTexel = (bytesPerBlock ?? 1) / blockWidth / blockHeight / 4;
+
+  // Iteration skips last row and column to avoid helper invocations because it is not
+  // guaranteed whether or not they participate in the subgroup operation.
+  const expected = new Map<number, number>();
+  for (let row = 0; row < height - 1; row++) {
+    for (let col = 0; col < width - 1; col++) {
+      const offset = uintsPerRow * row + col * uintsPerTexel;
+      const subgroup_id = data[offset + 1];
+
+      if (subgroup_id === 0) {
+        return new Error(`Internal error: helper invocation at (${col}, ${row})`);
+      }
+
+      let v = expected.get(subgroup_id) ?? 1;
+      // First index of input is an atomic counter.
+      v &= input[row * width + col];
+      expected.set(subgroup_id, v);
+    }
+  }
+
+  for (let row = 0; row < height - 1; row++) {
+    for (let col = 0; col < width - 1; col++) {
+      const offset = uintsPerRow * row + col * uintsPerTexel;
+      const res = data[offset];
+      const subgroup_id = data[offset + 1];
+
+      if (subgroup_id === 0) {
+        // Inactive in the fragment.
+        continue;
+      }
+
+      const expected_v = expected.get(subgroup_id) ?? 0;
+      if (expected_v !== res) {
+        return new Error(`Row ${row}, col ${col}: incorrect results:
+- expected: ${expected_v}
+-      got: ${res}`);
+      }
+    }
+  }
+
+  return undefined;
+}
+
+g.test('fragment,all_active')
+  .desc('Tests subgroupAll in fragment shaders')
+  .params(u =>
+    u
+      .combine('size', kFramebufferSizes)
+      .beginSubcases()
+      .combine('case', [...iterRange(kNumCases, x => x)])
+      .combineWithParams([{ format: 'rg32uint' }] as const)
+  )
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(async t => {
+    const numInputs = t.params.size[0] * t.params.size[1];
+    const inputData = generateInputData(t.params.case, numInputs);
+
+    const fsShader = `
+enable subgroups;
+
+@group(0) @binding(0)
+var<storage, read_write> inputs : array<u32>;
+
+@fragment
+fn main(
+  @builtin(position) pos : vec4f,
+) -> @location(0) vec2u {
+  // Generate a subgroup id based on linearized position, but avoid 0.
+  let linear = u32(pos.x) + u32(pos.y) * ${t.params.size[0]};
+  var subgroup_id = linear + 1;
+  subgroup_id = subgroupBroadcastFirst(subgroup_id);
+
+  // Filter out possible helper invocations.
+  let x_in_range = u32(pos.x) < (${t.params.size[0]} - 1);
+  let y_in_range = u32(pos.y) < (${t.params.size[1]} - 1);
+  let in_range = x_in_range && y_in_range;
+  let input = select(1u, inputs[linear], in_range);
+
+  let res = select(0u, 1u, subgroupAll(bool(input)));
+  return vec2u(res, subgroup_id);
+}`;
+
+    await runFragmentTest(
+      t,
+      t.params.format,
+      fsShader,
+      t.params.size[0],
+      t.params.size[1],
+      inputData,
+      (data: Uint32Array) => {
+        return checkFragmentAll(
+          data,
+          inputData,
+          t.params.format,
+          t.params.size[0],
+          t.params.size[1]
+        );
+      }
+    );
+  });
+
+// Using subgroup operations in control with fragment shaders
+// quickly leads to unportable behavior.
+g.test('fragment,split').unimplemented();
diff --git a/src/webgpu/shader/execution/expression/call/builtin/subgroupAny.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/subgroupAny.spec.ts
new file mode 100644
index 000000000000..5d5b9de11420
--- /dev/null
+++ b/src/webgpu/shader/execution/expression/call/builtin/subgroupAny.spec.ts
@@ -0,0 +1,390 @@
+export const description = `
+Execution tests for subgroupAny.
+
+Note: There is a lack of portability for non-uniform execution so these tests
+restrict themselves to uniform control flow.
+Note: There is no guaranteed mapping between subgroup_invocation_id and
+local_invocation_index. Tests should avoid assuming there is.
+`;
+
+import { makeTestGroup } from '../../../../../../common/framework/test_group.js';
+import { keysOf } from '../../../../../../common/util/data_tables.js';
+import { iterRange } from '../../../../../../common/util/util.js';
+import { kTextureFormatInfo } from '../../../../../format_info.js';
+import { align } from '../../../../../util/math.js';
+import { PRNG } from '../../../../../util/prng.js';
+
+import {
+  kWGSizes,
+  kPredicateCases,
+  SubgroupTest,
+  kDataSentinel,
+  runComputeTest,
+  runFragmentTest,
+  kFramebufferSizes,
+} from './subgroup_util.js';
+
+export const g = makeTestGroup(SubgroupTest);
+
+const kNumCases = 15;
+
+/**
+ * Generate input data for testing.
+ *
+ * Data is generated in the following categories:
+ * Seed 0 generates all 0 data
+ * Seed 1 generates all 1 data
+ * Seeds 2-9 generates all 0s except for a one randomly once per 32 elements
+ * Seeds 10+ generate all random data
+ * @param seed The seed for the PRNG
+ * @param num The number of data items to generate
+ */
+function generateInputData(seed: number, num: number): Uint32Array {
+  const prng = new PRNG(seed);
+
+  const bound = Math.min(num, 32);
+  const index = prng.uniformInt(bound);
+
+  return new Uint32Array([
+    ...iterRange(num, x => {
+      if (seed === 0) {
+        return 0;
+      } else if (seed === 1) {
+        return 1;
+      } else if (seed < 10) {
+        const bounded = x % bound;
+        return bounded === index ? 1 : 0;
+      }
+      return prng.uniformInt(2);
+    }),
+  ]);
+}
+
+/**
+ * Checks the result of a subgroupAny operation
+ *
+ * Since subgroup size depends on the pipeline compile, we calculate the expected
+ * results after execution. The shader generates a subgroup id and records it for
+ * each invocation. The check first calculates the expected result for each subgroup
+ * and then compares to the actual result for each invocation. The filter functor
+ * ensures only the correct invocations contribute to the calculation.
+ * @param metadata An array of uints:
+ *                 * first half containing subgroup sizes (from builtin value)
+ *                 * second half subgroup invocation id
+ * @param output An array of uints containing:
+ *               * first half is the outputs of subgroupAny
+ *               * second half is a generated subgroup id
+ * @param numInvs Number of invocations executed
+ * @param input The input data (equal size to output)
+ * @param filter A functor to filter active invocations
+ */
+function checkAny(
+  metadata: Uint32Array, // unused
+  output: Uint32Array,
+  numInvs: number,
+  input: Uint32Array,
+  filter: (id: number, size: number) => boolean
+): Error | undefined {
+  // First, generate expected results.
+  const expected = new Map<number, number>();
+  for (let inv = 0; inv < numInvs; inv++) {
+    const size = metadata[inv];
+    const id = metadata[inv + numInvs];
+    if (!filter(id, size)) {
+      continue;
+    }
+    const subgroup_id = output[numInvs + inv];
+    let v = expected.get(subgroup_id) ?? 0;
+    v |= input[inv];
+    expected.set(subgroup_id, v);
+  }
+
+  // Second, check against actual results.
+  for (let inv = 0; inv < numInvs; inv++) {
+    const size = metadata[inv];
+    const id = metadata[inv + numInvs];
+    const res = output[inv];
+    if (filter(id, size)) {
+      const subgroup_id = output[numInvs + inv];
+      const expected_v = expected.get(subgroup_id) ?? 0;
+      if (expected_v !== res) {
+        return new Error(`Invocation ${inv}:
+- expected: ${expected_v}
+-      got: ${res}`);
+      }
+    } else {
+      if (res !== kDataSentinel) {
+        return new Error(`Invocation ${inv} unexpected write:
+- subgroup invocation id: ${id}
+-          subgroup size: ${size}`);
+      }
+    }
+  }
+
+  return undefined;
+}
+
+g.test('compute,all_active')
+  .desc(`Test compute subgroupAny`)
+  .params(u =>
+    u
+      .combine('wgSize', kWGSizes)
+      .beginSubcases()
+      .combine('case', [...iterRange(kNumCases, x => x)])
+  )
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(async t => {
+    const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2];
+
+    const wgsl = `
+enable subgroups;
+
+@group(0) @binding(0)
+var<storage> inputs : array<u32>;
+
+@group(0) @binding(1)
+var<storage, read_write> outputs : array<u32>;
+
+struct Metadata {
+  subgroup_size: array<u32, ${wgThreads}>,
+  subgroup_invocation_id: array<u32, ${wgThreads}>,
+}
+
+@group(0) @binding(2)
+var<storage, read_write> metadata : Metadata;
+
+@compute @workgroup_size(${t.params.wgSize[0]}, ${t.params.wgSize[1]}, ${t.params.wgSize[2]})
+fn main(
+  @builtin(local_invocation_index) lid : u32,
+  @builtin(subgroup_invocation_id) id : u32,
+  @builtin(subgroup_size) subgroupSize : u32,
+) {
+  metadata.subgroup_size[lid] = subgroupSize;
+
+  metadata.subgroup_invocation_id[lid] = id;
+
+  // Record a representative subgroup id.
+  outputs[lid + ${wgThreads}] = subgroupBroadcastFirst(lid);
+
+  let res = select(0u, 1u, subgroupAny(bool(inputs[lid])));
+  outputs[lid] = res;
+}`;
+
+    const inputData = generateInputData(t.params.case, wgThreads);
+
+    const uintsPerOutput = 2;
+    await runComputeTest(
+      t,
+      wgsl,
+      [t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]],
+      uintsPerOutput,
+      inputData,
+      (metadata: Uint32Array, output: Uint32Array) => {
+        return checkAny(metadata, output, wgThreads, inputData, (id: number, size: number) => {
+          return true;
+        });
+      }
+    );
+  });
+
+g.test('compute,split')
+  .desc('Test that only active invocation participate')
+  .params(u =>
+    u
+      .combine('predicate', keysOf(kPredicateCases))
+      .beginSubcases()
+      .combine('wgSize', kWGSizes)
+      .combine('case', [...iterRange(kNumCases, x => x)])
+  )
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(async t => {
+    const testcase = kPredicateCases[t.params.predicate];
+    const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2];
+
+    const wgsl = `
+enable subgroups;
+
+@group(0) @binding(0)
+var<storage> inputs : array<u32>;
+
+@group(0) @binding(1)
+var<storage, read_write> outputs : array<u32>;
+
+struct Metadata {
+  subgroup_size : array<u32, ${wgThreads}>,
+  subgroup_invocation_id : array<u32, ${wgThreads}>,
+}
+
+@group(0) @binding(2)
+var<storage, read_write> metadata : Metadata;
+
+@compute @workgroup_size(${t.params.wgSize[0]}, ${t.params.wgSize[1]}, ${t.params.wgSize[2]})
+fn main(
+  @builtin(local_invocation_index) lid : u32,
+  @builtin(subgroup_invocation_id) id : u32,
+  @builtin(subgroup_size) subgroupSize : u32,
+) {
+  metadata.subgroup_size[lid] = subgroupSize;
+
+  // Record subgroup invocation id for this invocation.
+  metadata.subgroup_invocation_id[lid] = id;
+
+  // Record a generated subgroup id.
+  outputs[${wgThreads} + lid] = subgroupBroadcastFirst(lid);
+
+  if ${testcase.cond} {
+    outputs[lid] = select(0u, 1u, subgroupAny(bool(inputs[lid])));
+  } else {
+    return;
+  }
+}`;
+
+    const inputData = generateInputData(t.params.case, wgThreads);
+
+    const uintsPerOutput = 2;
+    await runComputeTest(
+      t,
+      wgsl,
+      [t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]],
+      uintsPerOutput,
+      inputData,
+      (metadata: Uint32Array, output: Uint32Array) => {
+        return checkAny(metadata, output, wgThreads, inputData, testcase.filter);
+      }
+    );
+  });
+
+/**
+ * Checks subgroupAny results from a fragment shader.
+ *
+ * @param data Framebuffer output
+ *             * component 0 is result
+ *             * component 1 is generated subgroup id
+ * @param input An array of input data
+ * @param format The framebuffer format
+ * @param width Framebuffer width
+ * @param height Framebuffer height
+ */
+function checkFragmentAny(
+  data: Uint32Array,
+  input: Uint32Array,
+  format: GPUTextureFormat,
+  width: number,
+  height: number
+): Error | undefined {
+  const { blockWidth, blockHeight, bytesPerBlock } = kTextureFormatInfo[format];
+  const blocksPerRow = width / blockWidth;
+  // 256 minimum comes from image copy requirements.
+  const bytesPerRow = align(blocksPerRow * (bytesPerBlock ?? 1), 256);
+  const uintsPerRow = bytesPerRow / 4;
+  const uintsPerTexel = (bytesPerBlock ?? 1) / blockWidth / blockHeight / 4;
+
+  // Iteration skips last row and column to avoid helper invocations because it is not
+  // guaranteed whether or not they participate in the subgroup operation.
+  const expected = new Map<number, number>();
+  for (let row = 0; row < height - 1; row++) {
+    for (let col = 0; col < width - 1; col++) {
+      const offset = uintsPerRow * row + col * uintsPerTexel;
+      const subgroup_id = data[offset + 1];
+
+      if (subgroup_id === 0) {
+        return new Error(`Internal error: helper invocation at (${col}, ${row})`);
+      }
+
+      let v = expected.get(subgroup_id) ?? 0;
+      // First index of input is an atomic counter.
+      v |= input[row * width + col];
+      expected.set(subgroup_id, v);
+    }
+  }
+
+  for (let row = 0; row < height - 1; row++) {
+    for (let col = 0; col < width - 1; col++) {
+      const offset = uintsPerRow * row + col * uintsPerTexel;
+      const res = data[offset];
+      const subgroup_id = data[offset + 1];
+
+      if (subgroup_id === 0) {
+        // Inactive in the fragment.
+        continue;
+      }
+
+      const expected_v = expected.get(subgroup_id) ?? 0;
+      if (expected_v !== res) {
+        return new Error(`Row ${row}, col ${col}: incorrect results:
+- expected: ${expected_v}
+-      got: ${res}`);
+      }
+    }
+  }
+
+  return undefined;
+}
+
+g.test('fragment,all_active')
+  .desc('Tests subgroupAny in fragment shaders')
+  .params(u =>
+    u
+      .combine('size', kFramebufferSizes)
+      .beginSubcases()
+      .combine('case', [...iterRange(kNumCases, x => x)])
+      .combineWithParams([{ format: 'rg32uint' }] as const)
+  )
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(async t => {
+    const numInputs = t.params.size[0] * t.params.size[1];
+    const inputData = generateInputData(t.params.case, numInputs);
+
+    const fsShader = `
+enable subgroups;
+
+@group(0) @binding(0)
+var<storage, read_write> inputs : array<u32>;
+
+@fragment
+fn main(
+  @builtin(position) pos : vec4f,
+) -> @location(0) vec2u {
+  // Generate a subgroup id based on linearized position, but avoid 0.
+  let linear = u32(pos.x) + u32(pos.y) * ${t.params.size[0]};
+  var subgroup_id = linear + 1;
+  subgroup_id = subgroupBroadcastFirst(subgroup_id);
+
+  // Filter out possible helper invocations.
+  let x_in_range = u32(pos.x) < (${t.params.size[0]} - 1);
+  let y_in_range = u32(pos.y) < (${t.params.size[1]} - 1);
+  let in_range = x_in_range && y_in_range;
+  let input = select(0u, inputs[linear], in_range);
+
+  let res = select(0u, 1u, subgroupAny(bool(input)));
+  return vec2u(res, subgroup_id);
+}`;
+
+    await runFragmentTest(
+      t,
+      t.params.format,
+      fsShader,
+      t.params.size[0],
+      t.params.size[1],
+      inputData,
+      (data: Uint32Array) => {
+        return checkFragmentAny(
+          data,
+          inputData,
+          t.params.format,
+          t.params.size[0],
+          t.params.size[1]
+        );
+      }
+    );
+  });
+
+// Using subgroup operations in control with fragment shaders
+// quickly leads to unportable behavior.
+g.test('fragment,split').unimplemented();
diff --git a/src/webgpu/shader/execution/expression/call/builtin/subgroupBitwise.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/subgroupBitwise.spec.ts
new file mode 100644
index 000000000000..c50fd08a1570
--- /dev/null
+++ b/src/webgpu/shader/execution/expression/call/builtin/subgroupBitwise.spec.ts
@@ -0,0 +1,562 @@
+export const description = `
+Execution tests for subgroupAny.
+
+Note: There is a lack of portability for non-uniform execution so these tests
+restrict themselves to uniform control flow.
+Note: There is no guaranteed mapping between subgroup_invocation_id and
+local_invocation_index. Tests should avoid assuming there is.
+`;
+
+import { makeTestGroup } from '../../../../../../common/framework/test_group.js';
+import { keysOf, objectsToRecord } from '../../../../../../common/util/data_tables.js';
+import { iterRange } from '../../../../../../common/util/util.js';
+import { kTextureFormatInfo } from '../../../../../format_info.js';
+import {
+  kConcreteSignedIntegerScalarsAndVectors,
+  kConcreteUnsignedIntegerScalarsAndVectors,
+  scalarTypeOf,
+  Type,
+  VectorType,
+} from '../../../../../util/conversion.js';
+import { align } from '../../../../../util/math.js';
+import { PRNG } from '../../../../../util/prng.js';
+
+import {
+  kWGSizes,
+  kPredicateCases,
+  SubgroupTest,
+  kDataSentinel,
+  runComputeTest,
+  runFragmentTest,
+  kFramebufferSizes,
+} from './subgroup_util.js';
+
+export const g = makeTestGroup(SubgroupTest);
+
+const kNumCases = 15;
+const kOps = ['subgroupAnd', 'subgroupOr', 'subgroupXor'] as const;
+const kTypes = objectsToRecord([
+  ...kConcreteSignedIntegerScalarsAndVectors,
+  ...kConcreteUnsignedIntegerScalarsAndVectors,
+]);
+
+/**
+ * Performs the appropriate bitwise operation on v1 and v2.
+ *
+ * @param op The subgroup operation
+ * @param v1 The first value
+ * @param v2 The second value
+ */
+function bitwise(op: 'subgroupAnd' | 'subgroupOr' | 'subgroupXor', v1: number, v2: number): number {
+  switch (op) {
+    case 'subgroupAnd':
+      return v1 & v2;
+    case 'subgroupOr':
+      return v1 | v2;
+    case 'subgroupXor':
+      return v1 ^ v2;
+  }
+}
+
+/**
+ * Returns the identity value for the subgroup operations
+ *
+ * @param op The subgroup operation
+ */
+function identity(op: 'subgroupAnd' | 'subgroupOr' | 'subgroupXor'): number {
+  switch (op) {
+    case 'subgroupAnd':
+      return ~0;
+    case 'subgroupOr':
+    case 'subgroupXor':
+      return 0;
+  }
+}
+
+/**
+ * Checks the results for data type test
+ *
+ * The shader generate a unique subgroup id for each subgroup (avoiding 0).
+ * The check calculates the expected result for all subgroups and then compares that
+ * to the actual results.
+ * @param metadata An array of integers divided as follows:
+ *                 * first half subgroup invocation id
+ *                 * second half unique subgroup id
+ * @param output An array of output values
+ * @param type The type being tested
+ * @param op The subgroup operation
+ * @param offset A constant offset added to subgroup invocation id to form the
+ *               the input to the subgroup operation
+ */
+function checkDataTypes(
+  metadata: Uint32Array,
+  output: Uint32Array,
+  type: Type,
+  op: 'subgroupAnd' | 'subgroupOr' | 'subgroupXor',
+  offset: number
+): undefined | Error {
+  const expected = new Map<number, number>();
+  for (let i = 0; i < Math.floor(metadata.length / 2); i++) {
+    const group_id = metadata[i + Math.floor(metadata.length / 2)];
+    let expect = expected.get(group_id) ?? identity(op);
+    expect = bitwise(op, expect, i + offset);
+    expected.set(group_id, expect);
+  }
+
+  let numEles = 1;
+  let stride = 1;
+  if (type instanceof VectorType) {
+    numEles = type.width;
+    stride = numEles === 3 ? 4 : numEles;
+  }
+  for (let inv = 0; inv < Math.floor(output.length / stride); inv++) {
+    const group_id = metadata[inv + Math.floor(metadata.length / 2)];
+    const expect = expected.get(group_id) ?? 0;
+    for (let ele = 0; ele < numEles; ele++) {
+      const res = output[inv * stride + ele];
+      if (res !== expect) {
+        return new Error(`Invocation ${inv}, component ${ele}: incorrect result
+- expected: ${expect}
+-      got: ${res}`);
+      }
+    }
+  }
+
+  return undefined;
+}
+
+g.test('data_types')
+  .desc('Tests allowed data types')
+  .params(u =>
+    u
+      .combine('type', keysOf(kTypes))
+      .beginSubcases()
+      .combine('wgSize', kWGSizes)
+      .combine('op', kOps)
+  )
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(async t => {
+    const type = kTypes[t.params.type];
+    let numEles = 1;
+    if (type instanceof VectorType) {
+      numEles = type.width === 3 ? 4 : type.width;
+    }
+
+    const scalarTy = scalarTypeOf(type);
+
+    const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2];
+
+    const wgsl = `
+enable subgroups;
+
+@group(0) @binding(0)
+var<storage> inputs : array<u32>;
+
+@group(0) @binding(1)
+var<storage, read_write> outputs : array<${type.toString()}>;
+
+struct Metadata {
+  id : array<u32, ${wgThreads}>,
+  group_id : array<u32, ${wgThreads}>
+}
+
+@group(0) @binding(2)
+var<storage, read_write> metadata : Metadata;
+
+@compute @workgroup_size(${t.params.wgSize[0]}, ${t.params.wgSize[1]}, ${t.params.wgSize[2]})
+fn main(
+  @builtin(local_invocation_index) lid : u32,
+  @builtin(subgroup_invocation_id) id : u32,
+) {
+
+  // Record subgroup invocation id for this invocation.
+  metadata.id[lid] = id;
+
+  // Record a unique id for this subgroup (avoid 0).
+  let group_id = subgroupBroadcastFirst(lid + 1);
+  metadata.group_id[lid] = group_id;
+
+  outputs[lid] = ${t.params.op}(${type.toString()}(${scalarTy.toString()}(lid + inputs[0])));
+}`;
+
+    const magicOffset = 0x7fff000f;
+    await runComputeTest(
+      t,
+      wgsl,
+      [t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]],
+      numEles,
+      new Uint32Array([magicOffset]),
+      (metadata: Uint32Array, output: Uint32Array) => {
+        return checkDataTypes(metadata, output, type, t.params.op, magicOffset);
+      }
+    );
+  });
+
+/**
+ * Generates randomized input data
+ *
+ * Case 0: All 0s
+ * Case 1: All 0xffffs
+ * Case 2-9: All identity values except an inverted value randomly every 32 values.
+ *           All values capped to 0xffff
+ * Case 10+: Random values in the range [0, 2 ** 30]
+ * @param seed The PRNG seed
+ * @param num The number of values to generate
+ * @param identity The identity value for the operation
+ */
+function generateInputData(seed: number, num: number, identity: number): Uint32Array {
+  const prng = new PRNG(seed);
+
+  const bound = Math.min(num, 32);
+  const index = prng.uniformInt(bound);
+
+  return new Uint32Array([
+    ...iterRange(num, x => {
+      if (seed === 0) {
+        return 0;
+      } else if (seed === 1) {
+        return 0xffff;
+      } else if (seed < 10) {
+        const bounded = x % bound;
+        let val = bounded === index ? ~identity : identity;
+        val &= 0xffff;
+        return val;
+      }
+      return prng.uniformInt(1 << 30);
+    }),
+  ]);
+}
+
+/**
+ * Checks the result of compute tests
+ *
+ * Calculates the expected results for each subgroup and compares against
+ * the actual output.
+ * @param metadata An array divided as follows:
+ *                 * first half: subgroup invocation id in lower 16 bits
+ *                               subgroup size in upper 16 bits
+ *                 * second half: unique subgroup id
+ * @param output The outputs
+ * @param input The input data
+ * @param op The subgroup operation
+ * @param filter A predicate used to filter invocations.
+ */
+function checkBitwiseCompute(
+  metadata: Uint32Array,
+  output: Uint32Array,
+  input: Uint32Array,
+  op: 'subgroupAnd' | 'subgroupOr' | 'subgroupXor',
+  filter: (id: number, size: number) => boolean
+): undefined | Error {
+  const expected = new Map<number, number>();
+  for (let i = 0; i < output.length; i++) {
+    const group_id = metadata[i + output.length];
+    const combo = metadata[i];
+    const id = combo & 0xffff;
+    const size = (combo >> 16) & 0xffff;
+    if (filter(id, size)) {
+      let expect = expected.get(group_id) ?? identity(op);
+      expect = bitwise(op, expect, input[i]);
+      expected.set(group_id, expect);
+    }
+  }
+
+  for (let i = 0; i < output.length; i++) {
+    const group_id = metadata[i + output.length];
+    const combo = metadata[i];
+    const id = combo & 0xffff;
+    const size = (combo >> 16) & 0xffff;
+    const res = output[i];
+    if (filter(id, size)) {
+      const expect = expected.get(group_id) ?? 0;
+      if (res !== expect) {
+        return new Error(`Invocation ${i}: incorrect result
+- expected: ${expect}
+-      got: ${res}`);
+      }
+    } else {
+      if (res !== kDataSentinel) {
+        return new Error(`Invocation ${i}: unexpected write`);
+      }
+    }
+  }
+
+  return undefined;
+}
+
+g.test('compute,all_active')
+  .desc('Test bitwise operations with randomized inputs')
+  .params(u =>
+    u
+      .combine('case', [...iterRange(kNumCases, x => x)])
+      .beginSubcases()
+      .combine('wgSize', kWGSizes)
+      .combine('op', kOps)
+  )
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(async t => {
+    const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2];
+
+    const wgsl = `
+enable subgroups;
+
+@group(0) @binding(0)
+var<storage> inputs : array<u32>;
+
+@group(0) @binding(1)
+var<storage, read_write> outputs : array<u32>;
+
+struct Metadata {
+  id_and_size : array<u32, ${wgThreads}>,
+  group_id : array<u32, ${wgThreads}>
+}
+
+@group(0) @binding(2)
+var<storage, read_write> metadata : Metadata;
+
+@compute @workgroup_size(${t.params.wgSize[0]}, ${t.params.wgSize[1]}, ${t.params.wgSize[2]})
+fn main(
+  @builtin(local_invocation_index) lid : u32,
+  @builtin(subgroup_invocation_id) id : u32,
+  @builtin(subgroup_size) sg_size : u32,
+) {
+
+  // Record both subgroup invocation id and subgroup size in the same u32.
+  // Subgroups sizes are in the range [4, 128] so both values fit.
+  metadata.id_and_size[lid] = id | (sg_size << 16);
+
+  // Record a unique id for this subgroup (avoid 0).
+  let group_id = subgroupBroadcastFirst(lid + 1);
+  metadata.group_id[lid] = group_id;
+
+  outputs[lid] = ${t.params.op}(inputs[lid]);
+}`;
+
+    const inputData = generateInputData(t.params.case, wgThreads, identity(t.params.op));
+    const uintsPerOutput = 1;
+    await runComputeTest(
+      t,
+      wgsl,
+      [t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]],
+      uintsPerOutput,
+      inputData,
+      (metadata: Uint32Array, output: Uint32Array) => {
+        return checkBitwiseCompute(
+          metadata,
+          output,
+          inputData,
+          t.params.op,
+          (id: number, size: number) => {
+            return true;
+          }
+        );
+      }
+    );
+  });
+
+g.test('compute,split')
+  .desc('Test that only active invocations participate')
+  .params(u =>
+    u
+      .combine('predicate', keysOf(kPredicateCases))
+      .beginSubcases()
+      .combine('wgSize', kWGSizes)
+      .combine('op', kOps)
+      .combine('case', [...iterRange(kNumCases, x => x)])
+  )
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(async t => {
+    const testcase = kPredicateCases[t.params.predicate];
+    const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2];
+
+    const wgsl = `
+enable subgroups;
+
+@group(0) @binding(0)
+var<storage> inputs : array<u32>;
+
+@group(0) @binding(1)
+var<storage, read_write> outputs : array<u32>;
+
+struct Metadata {
+  id_and_size : array<u32, ${wgThreads}>,
+  group_id : array<u32, ${wgThreads}>
+}
+
+@group(0) @binding(2)
+var<storage, read_write> metadata : Metadata;
+
+@compute @workgroup_size(${t.params.wgSize[0]}, ${t.params.wgSize[1]}, ${t.params.wgSize[2]})
+fn main(
+  @builtin(local_invocation_index) lid : u32,
+  @builtin(subgroup_invocation_id) id : u32,
+  @builtin(subgroup_size) subgroupSize : u32,
+) {
+
+  // Record both subgroup invocation id and subgroup size in the same u32.
+  // Subgroups sizes are in the range [4, 128] so both values fit.
+  metadata.id_and_size[lid] = id | (subgroupSize << 16);
+
+  // Record a unique id for this subgroup (avoid 0).
+  let group_id = subgroupBroadcastFirst(lid + 1);
+  metadata.group_id[lid] = group_id;
+
+  if ${testcase.cond} {
+    outputs[lid] = ${t.params.op}(inputs[lid]);
+  } else {
+    return;
+  }
+}`;
+
+    const inputData = generateInputData(t.params.case, wgThreads, identity(t.params.op));
+    const uintsPerOutput = 1;
+    await runComputeTest(
+      t,
+      wgsl,
+      [t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]],
+      uintsPerOutput,
+      inputData,
+      (metadata: Uint32Array, output: Uint32Array) => {
+        return checkBitwiseCompute(metadata, output, inputData, t.params.op, testcase.filter);
+      }
+    );
+  });
+
+/**
+ * Checks bitwise ops results from a fragment shader.
+ *
+ * Avoids the last row and column to skip potential helper invocations.
+ * @param data Framebuffer output
+ *             * component 0 is result
+ *             * component 1 is generated subgroup id
+ * @param input An array of input data
+ * @param op The subgroup operation
+ * @param format The framebuffer format
+ * @param width Framebuffer width
+ * @param height Framebuffer height
+ */
+function checkBitwiseFragment(
+  data: Uint32Array,
+  input: Uint32Array,
+  op: 'subgroupAnd' | 'subgroupOr' | 'subgroupXor',
+  format: GPUTextureFormat,
+  width: number,
+  height: number
+): Error | undefined {
+  const { blockWidth, blockHeight, bytesPerBlock } = kTextureFormatInfo[format];
+  const blocksPerRow = width / blockWidth;
+  // 256 minimum comes from image copy requirements.
+  const bytesPerRow = align(blocksPerRow * (bytesPerBlock ?? 1), 256);
+  const uintsPerRow = bytesPerRow / 4;
+  const uintsPerTexel = (bytesPerBlock ?? 1) / blockWidth / blockHeight / 4;
+
+  // Iteration skips last row and column to avoid helper invocations because it is not
+  // guaranteed whether or not they participate in the subgroup operation.
+  const expected = new Map<number, number>();
+  for (let row = 0; row < height - 1; row++) {
+    for (let col = 0; col < width - 1; col++) {
+      const offset = uintsPerRow * row + col * uintsPerTexel;
+      const subgroup_id = data[offset + 1];
+
+      if (subgroup_id === 0) {
+        return new Error(`Internal error: helper invocation at (${col}, ${row})`);
+      }
+
+      let v = expected.get(subgroup_id) ?? identity(op);
+      v = bitwise(op, v, input[row * width + col]);
+      expected.set(subgroup_id, v);
+    }
+  }
+
+  for (let row = 0; row < height - 1; row++) {
+    for (let col = 0; col < width - 1; col++) {
+      const offset = uintsPerRow * row + col * uintsPerTexel;
+      const res = data[offset];
+      const subgroup_id = data[offset + 1];
+
+      if (subgroup_id === 0) {
+        // Inactive in the fragment.
+        continue;
+      }
+
+      const expected_v = expected.get(subgroup_id) ?? 0;
+      if (expected_v !== res) {
+        return new Error(`Row ${row}, col ${col}: incorrect results:
+- expected: ${expected_v}
+-      got: ${res}`);
+      }
+    }
+  }
+
+  return undefined;
+}
+
+g.test('fragment,all_active')
+  .desc('Tests bitwise operations in fragment shaders')
+  .params(u =>
+    u
+      .combine('size', kFramebufferSizes)
+      .beginSubcases()
+      .combine('case', [...iterRange(kNumCases, x => x)])
+      .combine('op', kOps)
+      .combineWithParams([{ format: 'rg32uint' }] as const)
+  )
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(async t => {
+    const numInputs = t.params.size[0] * t.params.size[1];
+    const inputData = generateInputData(t.params.case, numInputs, identity(t.params.op));
+
+    const ident = identity(t.params.op) === 0 ? '0' : '0xffffffff';
+    const fsShader = `
+enable subgroups;
+
+@group(0) @binding(0)
+var<storage, read_write> inputs : array<u32>;
+
+@fragment
+fn main(
+  @builtin(position) pos : vec4f,
+) -> @location(0) vec2u {
+  // Generate a subgroup id based on linearized position, avoid 0.
+  let linear = u32(pos.x) + u32(pos.y) * ${t.params.size[0]};
+  let subgroup_id = subgroupBroadcastFirst(linear + 1);
+
+  // Filter out possible helper invocations.
+  let x_in_range = u32(pos.x) < (${t.params.size[0]} - 1);
+  let y_in_range = u32(pos.y) < (${t.params.size[1]} - 1);
+  let in_range = x_in_range && y_in_range;
+  let input = select(${ident}, inputs[linear], in_range);
+
+  let res = ${t.params.op}(input);
+  return vec2u(res, subgroup_id);
+}`;
+
+    await runFragmentTest(
+      t,
+      t.params.format,
+      fsShader,
+      t.params.size[0],
+      t.params.size[1],
+      inputData,
+      (data: Uint32Array) => {
+        return checkBitwiseFragment(
+          data,
+          inputData,
+          t.params.op,
+          t.params.format,
+          t.params.size[0],
+          t.params.size[1]
+        );
+      }
+    );
+  });
+
+g.test('fragment,split').unimplemented();
diff --git a/src/webgpu/shader/execution/expression/call/builtin/subgroupBroadcast.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/subgroupBroadcast.spec.ts
index b2fa9e46ec7a..75fe27e8cb5d 100644
--- a/src/webgpu/shader/execution/expression/call/builtin/subgroupBroadcast.spec.ts
+++ b/src/webgpu/shader/execution/expression/call/builtin/subgroupBroadcast.spec.ts
@@ -318,6 +318,4 @@ fn main(@builtin(subgroup_invocation_id) id : u32,
     t.expectGPUBufferValuesEqual(outputBuffer, new Uint32Array(expect));
   });
 
-g.test('dynamically_uniform_id').unimplemented();
-
 g.test('fragment').unimplemented();
diff --git a/src/webgpu/shader/execution/expression/call/builtin/subgroupMul.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/subgroupMul.spec.ts
new file mode 100644
index 000000000000..d45c023cd17a
--- /dev/null
+++ b/src/webgpu/shader/execution/expression/call/builtin/subgroupMul.spec.ts
@@ -0,0 +1,387 @@
+export const description = `
+Execution tests for subgroupMul, subgroupExclusiveMul, and subgroupInclusiveMul
+
+Note: There is a lack of portability for non-uniform execution so these tests
+restrict themselves to uniform control flow.
+Note: There is no guaranteed mapping between subgroup_invocation_id and
+local_invocation_index. Tests should avoid assuming there is.
+`;
+
+import { makeTestGroup } from '../../../../../../common/framework/test_group.js';
+import { keysOf, objectsToRecord } from '../../../../../../common/util/data_tables.js';
+import { iterRange } from '../../../../../../common/util/util.js';
+import { GPUTest } from '../../../../../gpu_test.js';
+import {
+  kConcreteNumericScalarsAndVectors,
+  Type,
+  VectorType,
+  numberToFloatBits,
+  floatBitsToNumber,
+  kFloat32Format,
+  kFloat16Format,
+  scalarTypeOf,
+} from '../../../../../util/conversion.js';
+import { FP } from '../../../../../util/floating_point.js';
+
+import {
+  kNumCases,
+  kStride,
+  kWGSizes,
+  kPredicateCases,
+  runAccuracyTest,
+  runComputeTest,
+} from './subgroup_util.js';
+
+export const g = makeTestGroup(GPUTest);
+
+const kIdentity = 1;
+
+const kDataTypes = objectsToRecord(kConcreteNumericScalarsAndVectors);
+
+const kOperations = ['subgroupMul', 'subgroupExclusiveMul', 'subgroupInclusiveMul'] as const;
+
+g.test('fp_accuracy')
+  .desc(
+    `Tests the accuracy of floating-point multiplication.
+
+The order of operations is implementation defined, most threads are filled with
+the identity value and two receive random values.
+Subgroup sizes are not known ahead of time so some cases may not perform any
+interesting operations. The test biases towards checking subgroup sizes under 64.
+These tests only check two values in order to reuse more of the existing infrastructure
+and limit the number of permutations needed to calculate the final result.`
+  )
+  .params(u =>
+    u
+      .combine('case', [...iterRange(kNumCases, x => x)])
+      .combine('type', ['f32', 'f16'] as const)
+      .combine('wgSize', [
+        [kStride, 1, 1],
+        [kStride / 2, 2, 1],
+      ] as const)
+  )
+  .beforeAllSubcases(t => {
+    const features: GPUFeatureName[] = ['subgroups' as GPUFeatureName];
+    if (t.params.type === 'f16') {
+      features.push('shader-f16');
+      features.push('subgroups-f16' as GPUFeatureName);
+    }
+    t.selectDeviceOrSkipTestCase(features);
+  })
+  .fn(async t => {
+    await runAccuracyTest(
+      t,
+      t.params.case,
+      [t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]],
+      'subgroupMul',
+      t.params.type,
+      kIdentity,
+      t.params.type === 'f16' ? FP.f16.multiplicationInterval : FP.f32.multiplicationInterval
+    );
+  });
+
+/**
+ * Checks subgroup multiplications.
+ *
+ * Expected results:
+ * - subgroupMul: each invocation should have result equal to 2 to the real subgroup size
+ * - subgroupExclusiveMul: each invocation should have result equal to 2 to its subgroup invocation id
+ * - subgroupInclusiveMul: each invocation should be equal to subgroupExclusiveMul result multiplied by the fill value
+ * @param metadata An array containing actual subgroup size per invocation followed by
+ *                 subgroup invocation id per invocation
+ * @param output An array of multiplications
+ * @param type The data type
+ * @param operation Type of multiplication
+ * @param expectedFillValue The original value used to fill the test array
+ */
+function checkMultiplication(
+  metadata: Uint32Array,
+  output: Uint32Array,
+  type: Type,
+  operation: 'subgroupMul' | 'subgroupExclusiveMul' | 'subgroupInclusiveMul',
+  expectedfillValue: number
+): undefined | Error {
+  let numEles = 1;
+  if (type instanceof VectorType) {
+    numEles = type.width;
+  }
+  const scalarTy = scalarTypeOf(type);
+  const expectedOffset = operation === 'subgroupMul' ? 0 : metadata.length / 2;
+  for (let i = 0; i < metadata.length / 2; i++) {
+    let expected = Math.pow(2, metadata[i + expectedOffset]);
+    if (operation === 'subgroupInclusiveMul') {
+      expected *= expectedfillValue;
+    }
+    for (let j = 0; j < numEles; j++) {
+      let idx = i * numEles + j;
+      const isOdd = idx & 0x1;
+      if (scalarTy === Type.f16) {
+        idx = Math.floor(idx / 2);
+      }
+      let val = output[idx];
+      if (scalarTy === Type.f32) {
+        val = floatBitsToNumber(val, kFloat32Format);
+      } else if (scalarTy === Type.f16) {
+        if (isOdd) {
+          val = val >> 16;
+        }
+        val = floatBitsToNumber(val & 0xffff, kFloat16Format);
+      }
+      if (expected !== val) {
+        return new Error(`Invocation ${i}, component ${j}: incorrect result
+- expected: ${expected}
+-      got: ${val}`);
+      }
+    }
+  }
+
+  return undefined;
+}
+
+g.test('data_types')
+  .desc(
+    `Tests subgroup multiplication for valid data types
+
+Tests a simple multiplication of all 2 values.
+Reductions expect result to be equal to actual subgroup size.
+Exclusice scans expect result to be equal subgroup invocation id.
+
+TODO: support vec3 types.
+  `
+  )
+  .params(u =>
+    u
+      .combine('type', keysOf(kDataTypes))
+      .filter(t => {
+        const type = kDataTypes[t.type];
+        if (type instanceof VectorType) {
+          return type.width !== 3;
+        }
+        return true;
+      })
+      .beginSubcases()
+      // Workgroup sizes are kept < 16 to avoid overflows.
+      // Other tests cover that the full subgroup will contribute.
+      .combine('wgSize', [
+        [4, 1, 1],
+        [8, 1, 1],
+        [1, 4, 1],
+        [1, 8, 1],
+        [1, 1, 4],
+        [1, 1, 8],
+        [2, 2, 2],
+        [4, 2, 1],
+        [4, 1, 2],
+        [2, 4, 1],
+        [2, 1, 4],
+        [1, 4, 2],
+        [1, 2, 4],
+        [3, 3, 1],
+        [3, 1, 3],
+        [1, 3, 3],
+      ] as const)
+      .combine('operation', kOperations)
+  )
+  .beforeAllSubcases(t => {
+    const features: GPUFeatureName[] = ['subgroups' as GPUFeatureName];
+    const type = kDataTypes[t.params.type];
+    if (type.requiresF16()) {
+      features.push('shader-f16');
+      features.push('subgroups-f16' as GPUFeatureName);
+    }
+    t.selectDeviceOrSkipTestCase(features);
+  })
+  .fn(async t => {
+    const type = kDataTypes[t.params.type];
+    let numEles = 1;
+    if (type instanceof VectorType) {
+      numEles = type.width;
+    }
+    const scalarType = scalarTypeOf(type);
+    let enables = 'enable subgroups;\n';
+    if (type.requiresF16()) {
+      enables += 'enable f16;\nenable subgroups_f16;\n';
+    }
+
+    const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2];
+
+    const wgsl = `
+${enables}
+
+@group(0) @binding(0)
+var<storage> inputs : array<${type.toString()}>;
+
+@group(0) @binding(1)
+var<storage, read_write> outputs : array<${type.toString()}>;
+
+struct Metadata {
+  subgroup_size : array<u32, ${wgThreads}>,
+  subgroup_invocation_id : array<u32, ${wgThreads}>,
+}
+
+@group(0) @binding(2)
+var<storage, read_write> metadata : Metadata;
+
+@compute @workgroup_size(${t.params.wgSize[0]}, ${t.params.wgSize[1]}, ${t.params.wgSize[2]})
+fn main(
+  @builtin(local_invocation_index) lid : u32,
+  @builtin(subgroup_invocation_id) id : u32,
+) {
+  // Record the actual subgroup size for this invocation.
+  // Note: subgroup_size builtin is always a power-of-2 and might be larger
+  // if the subgroup is not full.
+  let ballot = subgroupBallot(true);
+  var size = countOneBits(ballot.x);
+  size += countOneBits(ballot.y);
+  size += countOneBits(ballot.z);
+  size += countOneBits(ballot.w);
+  metadata.subgroup_size[lid] = size;
+
+  // Record subgroup invocation id for this invocation.
+  metadata.subgroup_invocation_id[lid] = id;
+
+  outputs[lid] = ${t.params.operation}(inputs[lid]);
+}`;
+
+    const expectedfillValue = 2;
+    let fillValue = expectedfillValue;
+    let numUints = wgThreads * numEles;
+    if (scalarType === Type.f32) {
+      fillValue = numberToFloatBits(fillValue, kFloat32Format);
+    } else if (scalarType === Type.f16) {
+      const f16 = numberToFloatBits(fillValue, kFloat16Format);
+      fillValue = f16 | (f16 << 16);
+      numUints = Math.ceil(numUints / 2);
+    }
+    await runComputeTest(
+      t,
+      wgsl,
+      [t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]],
+      numUints,
+      new Uint32Array([...iterRange(numUints, x => fillValue)]),
+      (metadata: Uint32Array, output: Uint32Array) => {
+        return checkMultiplication(metadata, output, type, t.params.operation, expectedfillValue);
+      }
+    );
+  });
+
+g.test('fragment').unimplemented();
+
+/**
+ * Performs correctness checking for predicated multiplications
+ *
+ * Assumes the shader performs a predicated subgroup multiplication with the
+ * subgroup_invocation_id as the data.
+ *
+ * @param metadata An array containing subgroup sizes and subgroup invocation ids
+ * @param output An array containing the output results
+ * @param operation The type of multiplication
+ * @param filter A functor that mirrors the predication in the shader
+ */
+function checkPredicatedMultiplication(
+  metadata: Uint32Array,
+  output: Uint32Array,
+  operation: 'subgroupMul' | 'subgroupExclusiveMul' | 'subgroupInclusiveMul',
+  filter: (id: number, size: number) => boolean
+): Error | undefined {
+  for (let i = 0; i < output.length; i++) {
+    const size = metadata[i];
+    const id = metadata[output.length + i];
+    let expected = 1;
+    if (filter(id, size)) {
+      // This function replicates the behavior in the shader.
+      const valueModFun = function (id: number) {
+        return (id % 4) + 1;
+      };
+      const bound =
+        operation === 'subgroupInclusiveMul' ? id + 1 : operation === 'subgroupMul' ? size : id;
+      for (let j = 0; j < bound; j++) {
+        if (filter(j, size)) {
+          expected *= valueModFun(j);
+        }
+      }
+    } else {
+      expected = 999;
+    }
+    if (expected !== output[i]) {
+      return new Error(`Invocation ${i}: incorrect result
+- expected: ${expected}
+-      got: ${output[i]}`);
+    }
+  }
+  return undefined;
+}
+
+g.test('compute,split')
+  .desc('Tests that only active invocations contribute to the operation')
+  .params(u =>
+    u
+      .combine('case', keysOf(kPredicateCases))
+      .beginSubcases()
+      .combine('operation', kOperations)
+      .combine('wgSize', kWGSizes)
+  )
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(async t => {
+    const testcase = kPredicateCases[t.params.case];
+    const outputUintsPerElement = 1;
+    const inputData = new Uint32Array([0]); // no input data
+    const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2];
+
+    const wgsl = `
+enable subgroups;
+
+@group(0) @binding(0)
+var<storage> input : array<u32>;
+
+@group(0) @binding(1)
+var<storage, read_write> outputs : array<u32>;
+
+struct Metadata {
+  subgroup_size : array<u32, ${wgThreads}>,
+  subgroup_invocation_id : array<u32, ${wgThreads}>,
+}
+
+@group(0) @binding(2)
+var<storage, read_write> metadata : Metadata;
+
+@compute @workgroup_size(${t.params.wgSize[0]}, ${t.params.wgSize[1]}, ${t.params.wgSize[2]})
+fn main(
+  @builtin(local_invocation_index) lid : u32,
+  @builtin(subgroup_invocation_id) id : u32,
+) {
+  _ = input[0];
+
+  // Record the actual subgroup size for this invocation.
+  // Note: subgroup_size builtin is always a power-of-2 and might be larger
+  // if the subgroup is not full.
+  let ballot = subgroupBallot(true);
+  var subgroupSize = countOneBits(ballot.x);
+  subgroupSize += countOneBits(ballot.y);
+  subgroupSize += countOneBits(ballot.z);
+  subgroupSize += countOneBits(ballot.w);
+  metadata.subgroup_size[lid] = subgroupSize;
+
+  // Record subgroup invocation id for this invocation.
+  metadata.subgroup_invocation_id[lid] = id;
+
+  if ${testcase.cond} {
+    outputs[lid] = ${t.params.operation}((id % 4) + 1);
+  } else {
+    return;
+  }
+}`;
+
+    await runComputeTest(
+      t,
+      wgsl,
+      [t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]],
+      outputUintsPerElement,
+      inputData,
+      (metadata: Uint32Array, output: Uint32Array) => {
+        return checkPredicatedMultiplication(metadata, output, t.params.operation, testcase.filter);
+      }
+    );
+  });
diff --git a/src/webgpu/shader/execution/expression/call/builtin/subgroup_util.ts b/src/webgpu/shader/execution/expression/call/builtin/subgroup_util.ts
new file mode 100644
index 000000000000..9438c265d7df
--- /dev/null
+++ b/src/webgpu/shader/execution/expression/call/builtin/subgroup_util.ts
@@ -0,0 +1,555 @@
+import { assert, iterRange } from '../../../../../../common/util/util.js';
+import { Float16Array } from '../../../../../../external/petamoriken/float16/float16.js';
+import { kTextureFormatInfo } from '../../../../../format_info.js';
+import { GPUTest, TextureTestMixin } from '../../../../../gpu_test.js';
+import { FPInterval } from '../../../../../util/floating_point.js';
+import { sparseScalarF16Range, sparseScalarF32Range, align } from '../../../../../util/math.js';
+import { PRNG } from '../../../../../util/prng.js';
+
+export class SubgroupTest extends TextureTestMixin(GPUTest) {}
+
+export const kNumCases = 1000;
+export const kStride = 128;
+
+export const kWGSizes = [
+  [4, 1, 1],
+  [8, 1, 1],
+  [16, 1, 1],
+  [32, 1, 1],
+  [64, 1, 1],
+  [128, 1, 1],
+  [256, 1, 1],
+  [1, 4, 1],
+  [1, 8, 1],
+  [1, 16, 1],
+  [1, 32, 1],
+  [1, 64, 1],
+  [1, 128, 1],
+  [1, 256, 1],
+  [1, 1, 4],
+  [1, 1, 8],
+  [1, 1, 16],
+  [1, 1, 32],
+  [1, 1, 64],
+  [3, 3, 3],
+  [4, 4, 4],
+  [16, 16, 1],
+  [16, 1, 16],
+  [1, 16, 16],
+  [15, 3, 3],
+  [3, 15, 3],
+  [3, 3, 15],
+] as const;
+
+export const kPredicateCases = {
+  every_even: {
+    cond: `id % 2 == 0`,
+    filter: (id: number, size: number) => {
+      return id % 2 === 0;
+    },
+  },
+  every_odd: {
+    cond: `id % 2 == 1`,
+    filter: (id: number, size: number) => {
+      return id % 2 === 1;
+    },
+  },
+  lower_half: {
+    cond: `id < subgroupSize / 2`,
+    filter: (id: number, size: number) => {
+      return id < Math.floor(size / 2);
+    },
+  },
+  upper_half: {
+    cond: `id >= subgroupSize / 2`,
+    filter: (id: number, size: number) => {
+      return id >= Math.floor(size / 2);
+    },
+  },
+  first_two: {
+    cond: `id == 0 || id == 1`,
+    filter: (id: number) => {
+      return id === 0 || id === 1;
+    },
+  },
+};
+
+/**
+ * Check the accuracy of the reduction operation.
+ *
+ * @param metadata An array containing subgroup ids for each invocation
+ * @param output An array containing the results of the reduction for each invocation
+ * @param indices An array of two values containing the indices of the interesting values in the input
+ * @param values An array of two values containing the interesting values in the input
+ * @param identity The identity for the operation
+ * @param intervalGen A functor to generate an appropriate FPInterval for a binary operation
+ */
+function checkAccuracy(
+  metadata: Uint32Array,
+  output: Float32Array | Float16Array,
+  indices: number[],
+  values: number[],
+  identity: number,
+  intervalGen: (x: number | FPInterval, y: number | FPInterval) => FPInterval
+): undefined | Error {
+  const subgroupIdIdx1 = metadata[indices[0]];
+  const subgroupIdIdx2 = metadata[indices[1]];
+  for (let i = 0; i < output.length; i++) {
+    const subgroupId = metadata[i];
+
+    const v1 = subgroupId === subgroupIdIdx1 ? values[0] : identity;
+    const v2 = subgroupId === subgroupIdIdx2 ? values[1] : identity;
+    const interval = intervalGen(v1, v2);
+    if (!interval.contains(output[i])) {
+      return new Error(`Invocation ${i}, subgroup id ${subgroupId}: incorrect result
+- interval: ${interval.toString()}
+- output: ${output[i]}`);
+    }
+  }
+
+  return undefined;
+}
+
+/**
+ * Run a floating-point accuracy subgroup test.
+ *
+ * @param t The base test
+ * @param seed A seed for the PRNG
+ * @param wgSize An array for the workgroup size
+ * @param operation The subgroup operation
+ * @param type The type (f16 or f32)
+ * @param identity The identity for the operation
+ * @param intervalGen A functor to generate an appropriate FPInterval for a binary operation
+ */
+export async function runAccuracyTest(
+  t: GPUTest,
+  seed: number,
+  wgSize: number[],
+  operation: string,
+  type: 'f16' | 'f32',
+  identity: number,
+  intervalGen: (x: number | FPInterval, y: number | FPInterval) => FPInterval
+) {
+  assert(seed < kNumCases);
+  const prng = new PRNG(seed);
+
+  // Compatibility mode has lower workgroup limits.
+  const wgThreads = wgSize[0] * wgSize[1] * wgSize[2];
+  const {
+    maxComputeInvocationsPerWorkgroup,
+    maxComputeWorkgroupSizeX,
+    maxComputeWorkgroupSizeY,
+    maxComputeWorkgroupSizeZ,
+  } = t.device.limits;
+  t.skipIf(
+    maxComputeInvocationsPerWorkgroup < wgThreads ||
+      maxComputeWorkgroupSizeX < wgSize[0] ||
+      maxComputeWorkgroupSizeY < wgSize[1] ||
+      maxComputeWorkgroupSizeZ < wgSize[2],
+    'Workgroup size too large'
+  );
+
+  // Bias half the cases to lower indices since most subgroup sizes are <= 64.
+  let indexLimit = kStride;
+  if (seed < kNumCases / 4) {
+    indexLimit = 16;
+  } else if (seed < kNumCases / 2) {
+    indexLimit = 64;
+  }
+
+  // Ensure two distinct indices are picked.
+  const idx1 = prng.uniformInt(indexLimit);
+  let idx2 = prng.uniformInt(indexLimit - 1);
+  if (idx1 === idx2) {
+    idx2++;
+  }
+  assert(idx2 < indexLimit);
+
+  // Select two random values.
+  const range = type === 'f16' ? sparseScalarF16Range() : sparseScalarF32Range();
+  const numVals = range.length;
+  const val1 = range[prng.uniformInt(numVals)];
+  const val2 = range[prng.uniformInt(numVals)];
+
+  const extraEnables = type === 'f16' ? `enable f16;\nenable subgroups_f16;` : ``;
+  const wgsl = `
+enable subgroups;
+${extraEnables}
+
+@group(0) @binding(0)
+var<storage> inputs : array<${type}>;
+
+@group(0) @binding(1)
+var<storage, read_write> outputs : array<${type}>;
+
+struct Metadata {
+  subgroup_id : array<u32, ${kStride}>,
+}
+
+@group(0) @binding(2)
+var<storage, read_write> metadata : Metadata;
+
+@compute @workgroup_size(${wgSize[0]}, ${wgSize[1]}, ${wgSize[2]})
+fn main(
+  @builtin(local_invocation_index) lid : u32,
+) {
+  metadata.subgroup_id[lid] = subgroupBroadcast(lid, 0);
+  outputs[lid] = ${operation}(inputs[lid]);
+}`;
+
+  const inputData =
+    type === 'f16'
+      ? new Float16Array([
+          ...iterRange(kStride, x => {
+            if (x === idx1) return val1;
+            if (x === idx2) return val2;
+            return identity;
+          }),
+        ])
+      : new Float32Array([
+          ...iterRange(kStride, x => {
+            if (x === idx1) return val1;
+            if (x === idx2) return val2;
+            return identity;
+          }),
+        ]);
+
+  const inputBuffer = t.makeBufferWithContents(
+    inputData,
+    GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE
+  );
+  t.trackForCleanup(inputBuffer);
+
+  const outputBuffer = t.makeBufferWithContents(
+    new Float32Array([...iterRange(kStride, x => 0)]),
+    GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE
+  );
+  t.trackForCleanup(outputBuffer);
+
+  const numMetadata = kStride;
+  const metadataBuffer = t.makeBufferWithContents(
+    new Uint32Array([...iterRange(numMetadata, x => 0)]),
+    GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE
+  );
+
+  const pipeline = t.device.createComputePipeline({
+    layout: 'auto',
+    compute: {
+      module: t.device.createShaderModule({
+        code: wgsl,
+      }),
+      entryPoint: 'main',
+    },
+  });
+  const bg = t.device.createBindGroup({
+    layout: pipeline.getBindGroupLayout(0),
+    entries: [
+      {
+        binding: 0,
+        resource: {
+          buffer: inputBuffer,
+        },
+      },
+      {
+        binding: 1,
+        resource: {
+          buffer: outputBuffer,
+        },
+      },
+      {
+        binding: 2,
+        resource: {
+          buffer: metadataBuffer,
+        },
+      },
+    ],
+  });
+
+  const encoder = t.device.createCommandEncoder();
+  const pass = encoder.beginComputePass();
+  pass.setPipeline(pipeline);
+  pass.setBindGroup(0, bg);
+  pass.dispatchWorkgroups(1, 1, 1);
+  pass.end();
+  t.queue.submit([encoder.finish()]);
+
+  const metadataReadback = await t.readGPUBufferRangeTyped(metadataBuffer, {
+    srcByteOffset: 0,
+    type: Uint32Array,
+    typedLength: numMetadata,
+    method: 'copy',
+  });
+  const metadata = metadataReadback.data;
+
+  let output: Float16Array | Float32Array;
+  if (type === 'f16') {
+    const outputReadback = await t.readGPUBufferRangeTyped(outputBuffer, {
+      srcByteOffset: 0,
+      type: Float16Array,
+      typedLength: kStride,
+      method: 'copy',
+    });
+    output = outputReadback.data;
+  } else {
+    const outputReadback = await t.readGPUBufferRangeTyped(outputBuffer, {
+      srcByteOffset: 0,
+      type: Float32Array,
+      typedLength: kStride,
+      method: 'copy',
+    });
+    output = outputReadback.data;
+  }
+
+  t.expectOK(checkAccuracy(metadata, output, [idx1, idx2], [val1, val2], identity, intervalGen));
+}
+
+// Repeat the bit pattern evey 16 bits for use with 16-bit types.
+export const kDataSentinel = 999 | (999 << 16);
+
+/**
+ * Runs compute shader subgroup test
+ *
+ * The test makes the following assumptions:
+ * * group(0) binding(0) is a storage buffer for input data
+ * * group(0) binding(1) is an output storage buffer for outputUintsPerElement * wgSize uints
+ * * group(0) binding(2) is an output storage buffer for 2 * wgSize uints
+ *
+ * @param t The base test
+ * @param wgsl The shader code
+ * @param outputUintsPerElement number of uints output per invocation
+ * @param inputData the input data
+ * @param checkFunction a functor that takes the output storage buffer data to check result validity
+ */
+export async function runComputeTest(
+  t: GPUTest,
+  wgsl: string,
+  wgSize: number[],
+  outputUintsPerElement: number,
+  inputData: Uint32Array,
+  checkFunction: (metadata: Uint32Array, output: Uint32Array) => Error | undefined
+) {
+  // Compatibility mode has lower workgroup limits.
+  const wgThreads = wgSize[0] * wgSize[1] * wgSize[2];
+  const {
+    maxComputeInvocationsPerWorkgroup,
+    maxComputeWorkgroupSizeX,
+    maxComputeWorkgroupSizeY,
+    maxComputeWorkgroupSizeZ,
+  } = t.device.limits;
+  t.skipIf(
+    maxComputeInvocationsPerWorkgroup < wgThreads ||
+      maxComputeWorkgroupSizeX < wgSize[0] ||
+      maxComputeWorkgroupSizeY < wgSize[1] ||
+      maxComputeWorkgroupSizeZ < wgSize[2],
+    'Workgroup size too large'
+  );
+
+  const inputBuffer = t.makeBufferWithContents(
+    inputData,
+    GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE
+  );
+  t.trackForCleanup(inputBuffer);
+
+  const outputUints = outputUintsPerElement * wgThreads;
+  const outputBuffer = t.makeBufferWithContents(
+    new Uint32Array([...iterRange(outputUints, x => kDataSentinel)]),
+    GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE
+  );
+  t.trackForCleanup(outputBuffer);
+
+  const numMetadata = 2 * wgThreads;
+  const metadataBuffer = t.makeBufferWithContents(
+    new Uint32Array([...iterRange(numMetadata, x => kDataSentinel)]),
+    GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE
+  );
+
+  const pipeline = t.device.createComputePipeline({
+    layout: 'auto',
+    compute: {
+      module: t.device.createShaderModule({
+        code: wgsl,
+      }),
+    },
+  });
+  const bg = t.device.createBindGroup({
+    layout: pipeline.getBindGroupLayout(0),
+    entries: [
+      {
+        binding: 0,
+        resource: {
+          buffer: inputBuffer,
+        },
+      },
+      {
+        binding: 1,
+        resource: {
+          buffer: outputBuffer,
+        },
+      },
+      {
+        binding: 2,
+        resource: {
+          buffer: metadataBuffer,
+        },
+      },
+    ],
+  });
+
+  const encoder = t.device.createCommandEncoder();
+  const pass = encoder.beginComputePass();
+  pass.setPipeline(pipeline);
+  pass.setBindGroup(0, bg);
+  pass.dispatchWorkgroups(1, 1, 1);
+  pass.end();
+  t.queue.submit([encoder.finish()]);
+
+  const metadataReadback = await t.readGPUBufferRangeTyped(metadataBuffer, {
+    srcByteOffset: 0,
+    type: Uint32Array,
+    typedLength: numMetadata,
+    method: 'copy',
+  });
+  const metadata = metadataReadback.data;
+
+  const outputReadback = await t.readGPUBufferRangeTyped(outputBuffer, {
+    srcByteOffset: 0,
+    type: Uint32Array,
+    typedLength: outputUints,
+    method: 'copy',
+  });
+  const output = outputReadback.data;
+
+  t.expectOK(checkFunction(metadata, output));
+}
+
+// Minimum size is [3, 3].
+export const kFramebufferSizes = [
+  [15, 15],
+  [16, 16],
+  [17, 17],
+  [19, 13],
+  [13, 10],
+  [111, 3],
+  [3, 111],
+  [35, 3],
+  [3, 35],
+  [53, 13],
+  [13, 53],
+  [3, 3],
+] as const;
+
+/**
+ * Runs a subgroup builtin test for fragment shaders
+ *
+ * This test draws a full screen triangle.
+ * Tests should avoid checking the last row or column to avoid helper
+ * invocations. Underlying APIs do not consistently guarantee whether
+ * helper invocations participate in subgroup operations.
+ * @param t The base test
+ * @param format The framebuffer format
+ * @param fsShader The fragment shader with the following interface:
+ *                 Location 0 output is framebuffer with format
+ *                 Group 0 binding 0 is input data
+ * @param width The framebuffer width
+ * @param height The framebuffer height
+ * @param inputData The input data
+ * @param checker A functor to check the framebuffer values
+ */
+export async function runFragmentTest(
+  t: SubgroupTest,
+  format: GPUTextureFormat,
+  fsShader: string,
+  width: number,
+  height: number,
+  inputData: Uint32Array | Float32Array | Float16Array,
+  checker: (data: Uint32Array) => Error | undefined
+) {
+  const vsShader = `
+@vertex
+fn vsMain(@builtin(vertex_index) index : u32) -> @builtin(position) vec4f {
+  const vertices = array(
+    vec2(-2, 4), vec2(-2, -4), vec2(2, 0),
+  );
+  return vec4f(vec2f(vertices[index]), 0, 1);
+}`;
+
+  assert(width >= 3, 'Minimum width is 3');
+  assert(height >= 3, 'Minimum height is 3');
+  const pipeline = t.device.createRenderPipeline({
+    layout: 'auto',
+    vertex: {
+      module: t.device.createShaderModule({ code: vsShader }),
+    },
+    fragment: {
+      module: t.device.createShaderModule({ code: fsShader }),
+      targets: [{ format }],
+    },
+    primitive: {
+      topology: 'triangle-list',
+    },
+  });
+
+  const { blockWidth, blockHeight, bytesPerBlock } = kTextureFormatInfo[format];
+  assert(bytesPerBlock !== undefined);
+
+  const blocksPerRow = width / blockWidth;
+  const blocksPerColumn = height / blockHeight;
+  // 256 minimum arises from image copy requirements.
+  const bytesPerRow = align(blocksPerRow * (bytesPerBlock ?? 1), 256);
+  const byteLength = bytesPerRow * blocksPerColumn;
+  const uintLength = byteLength / 4;
+
+  const buffer = t.makeBufferWithContents(
+    inputData,
+    GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST
+  );
+
+  const bg = t.device.createBindGroup({
+    layout: pipeline.getBindGroupLayout(0),
+    entries: [
+      {
+        binding: 0,
+        resource: {
+          buffer,
+        },
+      },
+    ],
+  });
+
+  const framebuffer = t.createTextureTracked({
+    size: [width, height],
+    usage:
+      GPUTextureUsage.COPY_SRC |
+      GPUTextureUsage.COPY_DST |
+      GPUTextureUsage.RENDER_ATTACHMENT |
+      GPUTextureUsage.TEXTURE_BINDING,
+    format,
+  });
+
+  const encoder = t.device.createCommandEncoder();
+  const pass = encoder.beginRenderPass({
+    colorAttachments: [
+      {
+        view: framebuffer.createView(),
+        loadOp: 'clear',
+        storeOp: 'store',
+      },
+    ],
+  });
+  pass.setPipeline(pipeline);
+  pass.setBindGroup(0, bg);
+  pass.draw(3);
+  pass.end();
+  t.queue.submit([encoder.finish()]);
+
+  const copyBuffer = t.copyWholeTextureToNewBufferSimple(framebuffer, 0);
+  const readback = await t.readGPUBufferRangeTyped(copyBuffer, {
+    srcByteOffset: 0,
+    type: Uint32Array,
+    typedLength: uintLength,
+    method: 'copy',
+  });
+  const data: Uint32Array = readback.data;
+
+  t.expectOK(checker(data));
+}
diff --git a/src/webgpu/shader/execution/expression/call/builtin/textureGather.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/textureGather.spec.ts
index 40b331efaba9..d2ba15adb969 100644
--- a/src/webgpu/shader/execution/expression/call/builtin/textureGather.spec.ts
+++ b/src/webgpu/shader/execution/expression/call/builtin/textureGather.spec.ts
@@ -1,6 +1,8 @@
 export const description = `
 Execution tests for the 'textureGather' builtin function
 
+- TODO: Test un-encodable formats.
+
 A texture gather operation reads from a 2D, 2D array, cube, or cube array texture, computing a four-component vector as follows:
  * Find the four texels that would be used in a sampling operation with linear filtering, from mip level 0:
    - Use the specified coordinate, array index (when present), and offset (when present).
@@ -23,11 +25,38 @@ A texture gather operation reads from a 2D, 2D array, cube, or cube array textur
 `;
 
 import { makeTestGroup } from '../../../../../../common/framework/test_group.js';
-import { GPUTest } from '../../../../../gpu_test.js';
+import {
+  isDepthTextureFormat,
+  isEncodableTextureFormat,
+  kCompressedTextureFormats,
+  kDepthStencilFormats,
+  kEncodableTextureFormats,
+} from '../../../../../format_info.js';
+
+import {
+  appendComponentTypeForFormatToTextureType,
+  checkCallResults,
+  chooseTextureSize,
+  createTextureWithRandomDataAndGetTexels,
+  doTextureCalls,
+  generateSamplePointsCube,
+  generateTextureBuiltinInputs2D,
+  isFillable,
+  kCubeSamplePointMethods,
+  kSamplePointMethods,
+  kShortAddressModes,
+  kShortAddressModeToAddressMode,
+  kShortShaderStages,
+  skipIfNeedsFilteringAndIsUnfilterableOrSelectDevice,
+  TextureCall,
+  vec2,
+  vec3,
+  WGSLTextureSampleTest,
+} from './texture_utils.js';
 
-import { generateCoordBoundaries, generateOffsets } from './utils.js';
+const kTestableColorFormats = [...kEncodableTextureFormats, ...kCompressedTextureFormats] as const;
 
-export const g = makeTestGroup(GPUTest);
+export const g = makeTestGroup(WGSLTextureSampleTest);
 
 g.test('sampled_2d_coords')
   .specURL('https://www.w3.org/TR/WGSL/#texturegather')
@@ -55,22 +84,89 @@ Parameters:
       Values outside of this range will result in a shader-creation error.
 `
   )
-  .paramsSubcasesOnly(u =>
+  .params(u =>
     u
-      .combine('T', ['f32-only', 'i32', 'u32'] as const)
-      .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat'])
+      .combine('stage', kShortShaderStages)
+      .combine('format', kTestableColorFormats)
+      .filter(t => isFillable(t.format))
+      .combine('filt', ['nearest', 'linear'] as const)
+      .combine('modeU', kShortAddressModes)
+      .combine('modeV', kShortAddressModes)
+      .combine('offset', [false, true] as const)
+      .beginSubcases()
       .combine('C', ['i32', 'u32'] as const)
-      .combine('C_value', [-1, 0, 1, 2, 3, 4] as const)
-      .combine('coords', generateCoordBoundaries(2))
-      .combine('offset', generateOffsets(2))
+      .combine('samplePoints', kSamplePointMethods)
   )
-  .unimplemented();
+  .beforeAllSubcases(t => {
+    t.skipIfTextureFormatNotSupported(t.params.format);
+    skipIfNeedsFilteringAndIsUnfilterableOrSelectDevice(t, t.params.filt, t.params.format);
+  })
+  .fn(async t => {
+    const { format, C, samplePoints, stage, modeU, modeV, filt: minFilter, offset } = t.params;
+
+    // We want at least 4 blocks or something wide enough for 3 mip levels.
+    const [width, height] = chooseTextureSize({ minSize: 8, minBlocks: 4, format });
+    const descriptor: GPUTextureDescriptor = {
+      format,
+      size: { width, height },
+      mipLevelCount: 3,
+      usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING,
+    };
+    const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor);
+    const sampler: GPUSamplerDescriptor = {
+      addressModeU: kShortAddressModeToAddressMode[modeU],
+      addressModeV: kShortAddressModeToAddressMode[modeV],
+      minFilter,
+      magFilter: minFilter,
+      mipmapFilter: minFilter,
+    };
+
+    const calls: TextureCall<vec2>[] = generateTextureBuiltinInputs2D(50, {
+      method: samplePoints,
+      textureBuiltin: 'textureGather',
+      sampler,
+      descriptor,
+      offset,
+      component: true,
+      hashInputs: [stage, format, C, samplePoints, modeU, modeV, minFilter, offset],
+    }).map(({ coords, component, offset }) => {
+      return {
+        builtin: 'textureGather',
+        coordType: 'f',
+        coords,
+        component,
+        componentType: C === 'i32' ? 'i' : 'u',
+        offset,
+      };
+    });
+    const textureType = appendComponentTypeForFormatToTextureType('texture_2d', format);
+    const viewDescriptor = {};
+    const results = await doTextureCalls(
+      t,
+      texture,
+      viewDescriptor,
+      textureType,
+      sampler,
+      calls,
+      stage
+    );
+    const res = await checkCallResults(
+      t,
+      { texels, descriptor, viewDescriptor },
+      textureType,
+      sampler,
+      calls,
+      results,
+      stage,
+      texture
+    );
+    t.expectOK(res);
+  });
 
 g.test('sampled_3d_coords')
   .specURL('https://www.w3.org/TR/WGSL/#texturegather')
   .desc(
     `
-C: i32, u32
 T: i32, u32, f32
 
 fn textureGather(component: C, t: texture_cube<T>, s: sampler, coords: vec3<f32>) -> vec4<T>
@@ -85,15 +181,86 @@ Parameters:
  * coords: The texture coordinates
 `
   )
-  .paramsSubcasesOnly(u =>
+  .params(u =>
     u
-      .combine('T', ['f32-only', 'i32', 'u32'] as const)
-      .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat'])
+      .combine('stage', kShortShaderStages)
+      .combine('format', kTestableColorFormats)
+      .filter(t => isFillable(t.format))
+      .combine('filt', ['nearest', 'linear'] as const)
+      .combine('mode', kShortAddressModes)
+      .beginSubcases()
       .combine('C', ['i32', 'u32'] as const)
-      .combine('C_value', [-1, 0, 1, 2, 3, 4] as const)
-      .combine('coords', generateCoordBoundaries(3))
+      .combine('samplePoints', kCubeSamplePointMethods)
   )
-  .unimplemented();
+  .beforeAllSubcases(t => {
+    t.skipIfTextureFormatNotSupported(t.params.format);
+    skipIfNeedsFilteringAndIsUnfilterableOrSelectDevice(t, t.params.filt, t.params.format);
+  })
+  .fn(async t => {
+    const { format, C, stage, samplePoints, mode, filt: minFilter } = t.params;
+
+    const viewDimension: GPUTextureViewDimension = 'cube';
+    const [width, height] = chooseTextureSize({ minSize: 8, minBlocks: 2, format, viewDimension });
+    const depthOrArrayLayers = 6;
+
+    const descriptor: GPUTextureDescriptor = {
+      format,
+      ...(t.isCompatibility && { textureBindingViewDimension: viewDimension }),
+      size: { width, height, depthOrArrayLayers },
+      usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING,
+      mipLevelCount: 3,
+    };
+    const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor);
+    const sampler: GPUSamplerDescriptor = {
+      addressModeU: kShortAddressModeToAddressMode[mode],
+      addressModeV: kShortAddressModeToAddressMode[mode],
+      addressModeW: kShortAddressModeToAddressMode[mode],
+      minFilter,
+      magFilter: minFilter,
+      mipmapFilter: minFilter,
+    };
+
+    const calls: TextureCall<vec3>[] = generateSamplePointsCube(50, {
+      method: samplePoints,
+      sampler,
+      descriptor,
+      component: true,
+      textureBuiltin: 'textureGather',
+      hashInputs: [stage, format, C, samplePoints, mode, minFilter],
+    }).map(({ coords, component }) => {
+      return {
+        builtin: 'textureGather',
+        component,
+        componentType: C === 'i32' ? 'i' : 'u',
+        coordType: 'f',
+        coords,
+      };
+    });
+    const viewDescriptor = {
+      dimension: viewDimension,
+    };
+    const textureType = appendComponentTypeForFormatToTextureType('texture_cube', format);
+    const results = await doTextureCalls(
+      t,
+      texture,
+      viewDescriptor,
+      textureType,
+      sampler,
+      calls,
+      stage
+    );
+    const res = await checkCallResults(
+      t,
+      { texels, descriptor, viewDescriptor },
+      textureType,
+      sampler,
+      calls,
+      results,
+      stage,
+      texture
+    );
+    t.expectOK(res);
+  });
 
 g.test('sampled_array_2d_coords')
   .specURL('https://www.w3.org/TR/WGSL/#texturegather')
@@ -122,17 +289,90 @@ Parameters:
       Values outside of this range will result in a shader-creation error.
 `
   )
-  .paramsSubcasesOnly(u =>
+  .params(u =>
     u
-      .combine('T', ['f32-only', 'i32', 'u32'] as const)
-      .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat'])
+      .combine('stage', kShortShaderStages)
+      .combine('format', kTestableColorFormats)
+      .filter(t => isFillable(t.format))
+      .combine('filt', ['nearest', 'linear'] as const)
+      .combine('modeU', kShortAddressModes)
+      .combine('modeV', kShortAddressModes)
+      .combine('offset', [false, true] as const)
+      .beginSubcases()
+      .combine('samplePoints', kSamplePointMethods)
       .combine('C', ['i32', 'u32'] as const)
-      .combine('C_value', [-1, 0, 1, 2, 3, 4] as const)
-      .combine('coords', generateCoordBoundaries(2))
-      /* array_index not param'd as out-of-bounds is implementation specific */
-      .combine('offset', generateOffsets(2))
+      .combine('A', ['i32', 'u32'] as const)
   )
-  .unimplemented();
+  .beforeAllSubcases(t => {
+    t.skipIfTextureFormatNotSupported(t.params.format);
+    skipIfNeedsFilteringAndIsUnfilterableOrSelectDevice(t, t.params.filt, t.params.format);
+  })
+  .fn(async t => {
+    const { format, stage, samplePoints, C, A, modeU, modeV, filt: minFilter, offset } = t.params;
+
+    // We want at least 4 blocks or something wide enough for 3 mip levels.
+    const [width, height] = chooseTextureSize({ minSize: 8, minBlocks: 4, format });
+    const depthOrArrayLayers = 4;
+
+    const descriptor: GPUTextureDescriptor = {
+      format,
+      size: { width, height, depthOrArrayLayers },
+      mipLevelCount: 3,
+      usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING,
+    };
+    const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor);
+    const sampler: GPUSamplerDescriptor = {
+      addressModeU: kShortAddressModeToAddressMode[modeU],
+      addressModeV: kShortAddressModeToAddressMode[modeV],
+      minFilter,
+      magFilter: minFilter,
+      mipmapFilter: minFilter,
+    };
+
+    const calls: TextureCall<vec2>[] = generateTextureBuiltinInputs2D(50, {
+      method: samplePoints,
+      textureBuiltin: 'textureGather',
+      sampler,
+      descriptor,
+      arrayIndex: { num: texture.depthOrArrayLayers, type: A },
+      offset,
+      component: true,
+      hashInputs: [stage, format, samplePoints, C, A, modeU, modeV, minFilter, offset],
+    }).map(({ coords, component, arrayIndex, offset }) => {
+      return {
+        builtin: 'textureGather',
+        component,
+        componentType: C === 'i32' ? 'i' : 'u',
+        coordType: 'f',
+        coords,
+        arrayIndex,
+        arrayIndexType: A === 'i32' ? 'i' : 'u',
+        offset,
+      };
+    });
+    const textureType = appendComponentTypeForFormatToTextureType('texture_2d_array', format);
+    const viewDescriptor = {};
+    const results = await doTextureCalls(
+      t,
+      texture,
+      viewDescriptor,
+      textureType,
+      sampler,
+      calls,
+      stage
+    );
+    const res = await checkCallResults(
+      t,
+      { texels, descriptor, viewDescriptor },
+      textureType,
+      sampler,
+      calls,
+      results,
+      stage,
+      texture
+    );
+    t.expectOK(res);
+  });
 
 g.test('sampled_array_3d_coords')
   .specURL('https://www.w3.org/TR/WGSL/#texturegather')
@@ -140,8 +380,9 @@ g.test('sampled_array_3d_coords')
     `
 C: i32, u32
 T: i32, u32, f32
+A: i32, u32
 
-fn textureGather(component: C, t: texture_cube_array<T>, s: sampler, coords: vec3<f32>, array_index: C) -> vec4<T>
+fn textureGather(component: C, t: texture_cube_array<T>, s: sampler, coords: vec3<f32>, array_index: A) -> vec4<T>
 
 Parameters:
  * component:
@@ -154,17 +395,90 @@ Parameters:
  * array_index: The 0-based texture array index
 `
   )
-  .paramsSubcasesOnly(
-    u =>
-      u
-        .combine('T', ['f32-only', 'i32', 'u32'] as const)
-        .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat'])
-        .combine('C', ['i32', 'u32'] as const)
-        .combine('C_value', [-1, 0, 1, 2, 3, 4] as const)
-        .combine('coords', generateCoordBoundaries(3))
-    /* array_index not param'd as out-of-bounds is implementation specific */
+  .params(u =>
+    u
+      .combine('stage', kShortShaderStages)
+      .combine('format', kTestableColorFormats)
+      .filter(t => isFillable(t.format))
+      .combine('filt', ['nearest', 'linear'] as const)
+      .combine('mode', kShortAddressModes)
+      .beginSubcases()
+      .combine('samplePoints', kCubeSamplePointMethods)
+      .combine('C', ['i32', 'u32'] as const)
+      .combine('A', ['i32', 'u32'] as const)
   )
-  .unimplemented();
+  .beforeAllSubcases(t => {
+    t.skipIfTextureFormatNotSupported(t.params.format);
+    t.skipIfTextureViewDimensionNotSupported('cube-array');
+    skipIfNeedsFilteringAndIsUnfilterableOrSelectDevice(t, t.params.filt, t.params.format);
+  })
+  .fn(async t => {
+    const { format, C, A, stage, samplePoints, mode, filt: minFilter } = t.params;
+
+    const viewDimension: GPUTextureViewDimension = 'cube-array';
+    const size = chooseTextureSize({ minSize: 8, minBlocks: 2, format, viewDimension });
+
+    const descriptor: GPUTextureDescriptor = {
+      format,
+      ...(t.isCompatibility && { textureBindingViewDimension: viewDimension }),
+      size,
+      usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING,
+      mipLevelCount: 3,
+    };
+    const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor);
+    const sampler: GPUSamplerDescriptor = {
+      addressModeU: kShortAddressModeToAddressMode[mode],
+      addressModeV: kShortAddressModeToAddressMode[mode],
+      addressModeW: kShortAddressModeToAddressMode[mode],
+      minFilter,
+      magFilter: minFilter,
+      mipmapFilter: minFilter,
+    };
+
+    const calls: TextureCall<vec3>[] = generateSamplePointsCube(50, {
+      method: samplePoints,
+      sampler,
+      descriptor,
+      component: true,
+      textureBuiltin: 'textureGather',
+      arrayIndex: { num: texture.depthOrArrayLayers / 6, type: A },
+      hashInputs: [stage, format, C, samplePoints, mode, minFilter],
+    }).map(({ coords, component, arrayIndex }) => {
+      return {
+        builtin: 'textureGather',
+        component,
+        componentType: C === 'i32' ? 'i' : 'u',
+        arrayIndex,
+        arrayIndexType: A === 'i32' ? 'i' : 'u',
+        coordType: 'f',
+        coords,
+      };
+    });
+    const viewDescriptor = {
+      dimension: viewDimension,
+    };
+    const textureType = appendComponentTypeForFormatToTextureType('texture_cube_array', format);
+    const results = await doTextureCalls(
+      t,
+      texture,
+      viewDescriptor,
+      textureType,
+      sampler,
+      calls,
+      stage
+    );
+    const res = await checkCallResults(
+      t,
+      { texels, descriptor, viewDescriptor },
+      textureType,
+      sampler,
+      calls,
+      results,
+      stage,
+      texture
+    );
+    t.expectOK(res);
+  });
 
 g.test('depth_2d_coords')
   .specURL('https://www.w3.org/TR/WGSL/#texturegather')
@@ -185,13 +499,79 @@ Parameters:
       Values outside of this range will result in a shader-creation error.
 `
   )
-  .paramsSubcasesOnly(u =>
+  .params(u =>
     u
-      .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat'])
-      .combine('coords', generateCoordBoundaries(2))
-      .combine('offset', generateOffsets(2))
+      .combine('stage', kShortShaderStages)
+      .combine('format', kDepthStencilFormats)
+      // filter out stencil only formats
+      .filter(t => isDepthTextureFormat(t.format))
+      // MAINTENANCE_TODO: Remove when support for depth24plus, depth24plus-stencil8, and depth32float-stencil8 is added.
+      .filter(t => isEncodableTextureFormat(t.format))
+      .combine('filt', ['nearest', 'linear'] as const)
+      .combine('modeU', kShortAddressModes)
+      .combine('modeV', kShortAddressModes)
+      .combine('offset', [false, true] as const)
+      .beginSubcases()
+      .combine('samplePoints', kSamplePointMethods)
   )
-  .unimplemented();
+  .fn(async t => {
+    const { format, stage, samplePoints, modeU, modeV, filt: minFilter, offset } = t.params;
+
+    // We want at least 4 blocks or something wide enough for 3 mip levels.
+    const [width, height] = chooseTextureSize({ minSize: 8, minBlocks: 4, format });
+    const descriptor: GPUTextureDescriptor = {
+      format,
+      size: { width, height },
+      mipLevelCount: 3,
+      usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING,
+    };
+    const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor);
+    const sampler: GPUSamplerDescriptor = {
+      addressModeU: kShortAddressModeToAddressMode[modeU],
+      addressModeV: kShortAddressModeToAddressMode[modeV],
+      minFilter,
+      magFilter: minFilter,
+      mipmapFilter: minFilter,
+    };
+
+    const calls: TextureCall<vec2>[] = generateTextureBuiltinInputs2D(50, {
+      method: samplePoints,
+      textureBuiltin: 'textureGather',
+      sampler,
+      descriptor,
+      offset,
+      hashInputs: [stage, format, samplePoints, modeU, modeV, minFilter, offset],
+    }).map(({ coords, offset }) => {
+      return {
+        builtin: 'textureGather',
+        coordType: 'f',
+        coords,
+        offset,
+      };
+    });
+    const textureType = 'texture_depth_2d';
+    const viewDescriptor = {};
+    const results = await doTextureCalls(
+      t,
+      texture,
+      viewDescriptor,
+      textureType,
+      sampler,
+      calls,
+      stage
+    );
+    const res = await checkCallResults(
+      t,
+      { texels, descriptor, viewDescriptor },
+      textureType,
+      sampler,
+      calls,
+      results,
+      stage,
+      texture
+    );
+    t.expectOK(res);
+  });
 
 g.test('depth_3d_coords')
   .specURL('https://www.w3.org/TR/WGSL/#texturegather')
@@ -205,21 +585,90 @@ Parameters:
  * coords: The texture coordinates
 `
   )
-  .paramsSubcasesOnly(u =>
+  .params(u =>
     u
-      .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat'])
-      .combine('coords', generateCoordBoundaries(3))
+      .combine('stage', kShortShaderStages)
+      .combine('format', kDepthStencilFormats)
+      // filter out stencil only formats
+      .filter(t => isDepthTextureFormat(t.format))
+      // MAINTENANCE_TODO: Remove when support for depth24plus, depth24plus-stencil8, and depth32float-stencil8 is added.
+      .filter(t => isEncodableTextureFormat(t.format))
+      .combine('filt', ['nearest', 'linear'] as const)
+      .combine('mode', kShortAddressModes)
+      .beginSubcases()
+      .combine('samplePoints', kCubeSamplePointMethods)
   )
-  .unimplemented();
+  .fn(async t => {
+    const { format, stage, samplePoints, mode, filt: minFilter } = t.params;
+
+    const viewDimension: GPUTextureViewDimension = 'cube';
+    const [width, height] = chooseTextureSize({ minSize: 8, minBlocks: 2, format, viewDimension });
+    const depthOrArrayLayers = 6;
+
+    const descriptor: GPUTextureDescriptor = {
+      format,
+      ...(t.isCompatibility && { textureBindingViewDimension: viewDimension }),
+      size: { width, height, depthOrArrayLayers },
+      usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING,
+      mipLevelCount: 3,
+    };
+    const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor);
+    const sampler: GPUSamplerDescriptor = {
+      addressModeU: kShortAddressModeToAddressMode[mode],
+      addressModeV: kShortAddressModeToAddressMode[mode],
+      addressModeW: kShortAddressModeToAddressMode[mode],
+      minFilter,
+      magFilter: minFilter,
+      mipmapFilter: minFilter,
+    };
+
+    const calls: TextureCall<vec3>[] = generateSamplePointsCube(50, {
+      method: samplePoints,
+      sampler,
+      descriptor,
+      textureBuiltin: 'textureGather',
+      hashInputs: [stage, format, samplePoints, mode, minFilter],
+    }).map(({ coords, component }) => {
+      return {
+        builtin: 'textureGather',
+        coordType: 'f',
+        coords,
+      };
+    });
+    const viewDescriptor = {
+      dimension: viewDimension,
+    };
+    const textureType = 'texture_depth_cube';
+    const results = await doTextureCalls(
+      t,
+      texture,
+      viewDescriptor,
+      textureType,
+      sampler,
+      calls,
+      stage
+    );
+    const res = await checkCallResults(
+      t,
+      { texels, descriptor, viewDescriptor },
+      textureType,
+      sampler,
+      calls,
+      results,
+      stage,
+      texture
+    );
+    t.expectOK(res);
+  });
 
 g.test('depth_array_2d_coords')
   .specURL('https://www.w3.org/TR/WGSL/#texturegather')
   .desc(
     `
-C: i32, u32
+A: i32, u32
 
-fn textureGather(t: texture_depth_2d_array, s: sampler, coords: vec2<f32>, array_index: C) -> vec4<f32>
-fn textureGather(t: texture_depth_2d_array, s: sampler, coords: vec2<f32>, array_index: C, offset: vec2<i32>) -> vec4<f32>
+fn textureGather(t: texture_depth_2d_array, s: sampler, coords: vec2<f32>, array_index: A) -> vec4<f32>
+fn textureGather(t: texture_depth_2d_array, s: sampler, coords: vec2<f32>, array_index: A, offset: vec2<i32>) -> vec4<f32>
 
 Parameters:
  * t: The depth texture to read from
@@ -234,23 +683,97 @@ Parameters:
       Values outside of this range will result in a shader-creation error.
 `
   )
-  .paramsSubcasesOnly(u =>
+  .params(u =>
     u
-      .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat'])
-      .combine('C', ['i32', 'u32'] as const)
-      .combine('coords', generateCoordBoundaries(2))
-      /* array_index not param'd as out-of-bounds is implementation specific */
-      .combine('offset', generateOffsets(2))
+      .combine('stage', kShortShaderStages)
+      .combine('format', kDepthStencilFormats)
+      // filter out stencil only formats
+      .filter(t => isDepthTextureFormat(t.format))
+      // MAINTENANCE_TODO: Remove when support for depth24plus, depth24plus-stencil8, and depth32float-stencil8 is added.
+      .filter(t => isEncodableTextureFormat(t.format))
+      .combine('filt', ['nearest', 'linear'] as const)
+      .combine('modeU', kShortAddressModes)
+      .combine('modeV', kShortAddressModes)
+      .combine('offset', [false, true] as const)
+      .beginSubcases()
+      .combine('samplePoints', kSamplePointMethods)
+      .combine('A', ['i32', 'u32'] as const)
   )
-  .unimplemented();
+  .beforeAllSubcases(t => {
+    t.skipIfTextureFormatNotSupported(t.params.format);
+    skipIfNeedsFilteringAndIsUnfilterableOrSelectDevice(t, t.params.filt, t.params.format);
+  })
+  .fn(async t => {
+    const { format, stage, samplePoints, A, modeU, modeV, filt: minFilter, offset } = t.params;
+
+    // We want at least 4 blocks or something wide enough for 3 mip levels.
+    const [width, height] = chooseTextureSize({ minSize: 8, minBlocks: 4, format });
+    const depthOrArrayLayers = 4;
+
+    const descriptor: GPUTextureDescriptor = {
+      format,
+      size: { width, height, depthOrArrayLayers },
+      mipLevelCount: 3,
+      usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING,
+    };
+    const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor);
+    const sampler: GPUSamplerDescriptor = {
+      addressModeU: kShortAddressModeToAddressMode[modeU],
+      addressModeV: kShortAddressModeToAddressMode[modeV],
+      minFilter,
+      magFilter: minFilter,
+      mipmapFilter: minFilter,
+    };
+
+    const calls: TextureCall<vec2>[] = generateTextureBuiltinInputs2D(50, {
+      method: samplePoints,
+      textureBuiltin: 'textureGather',
+      sampler,
+      descriptor,
+      arrayIndex: { num: texture.depthOrArrayLayers, type: A },
+      offset,
+      hashInputs: [stage, format, samplePoints, A, modeU, modeV, minFilter, offset],
+    }).map(({ coords, arrayIndex, offset }) => {
+      return {
+        builtin: 'textureGather',
+        coordType: 'f',
+        coords,
+        arrayIndex,
+        arrayIndexType: A === 'i32' ? 'i' : 'u',
+        offset,
+      };
+    });
+    const textureType = 'texture_depth_2d_array';
+    const viewDescriptor = {};
+    const results = await doTextureCalls(
+      t,
+      texture,
+      viewDescriptor,
+      textureType,
+      sampler,
+      calls,
+      stage
+    );
+    const res = await checkCallResults(
+      t,
+      { texels, descriptor, viewDescriptor },
+      textureType,
+      sampler,
+      calls,
+      results,
+      stage,
+      texture
+    );
+    t.expectOK(res);
+  });
 
 g.test('depth_array_3d_coords')
   .specURL('https://www.w3.org/TR/WGSL/#texturegather')
   .desc(
     `
-C: i32, u32
+A: i32, u32
 
-fn textureGather(t: texture_depth_cube_array, s: sampler, coords: vec3<f32>, array_index: C) -> vec4<f32>
+fn textureGather(t: texture_depth_cube_array, s: sampler, coords: vec3<f32>, array_index: A) -> vec4<f32>
 
 Parameters:
  * t: The depth texture to read from
@@ -259,12 +782,84 @@ Parameters:
  * array_index: The 0-based texture array index
 `
   )
-  .paramsSubcasesOnly(
-    u =>
-      u
-        .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat'])
-        .combine('C', ['i32', 'u32'] as const)
-        .combine('coords', generateCoordBoundaries(3))
-    /* array_index not param'd as out-of-bounds is implementation specific */
+  .params(u =>
+    u
+      .combine('stage', kShortShaderStages)
+      .combine('format', kDepthStencilFormats)
+      // filter out stencil only formats
+      .filter(t => isDepthTextureFormat(t.format))
+      // MAINTENANCE_TODO: Remove when support for depth24plus, depth24plus-stencil8, and depth32float-stencil8 is added.
+      .filter(t => isEncodableTextureFormat(t.format))
+      .combine('filt', ['nearest', 'linear'] as const)
+      .combine('mode', kShortAddressModes)
+      .beginSubcases()
+      .combine('samplePoints', kCubeSamplePointMethods)
+      .combine('A', ['i32', 'u32'] as const)
   )
-  .unimplemented();
+  .beforeAllSubcases(t => {
+    t.skipIfTextureViewDimensionNotSupported('cube-array');
+  })
+  .fn(async t => {
+    const { format, A, stage, samplePoints, mode, filt: minFilter } = t.params;
+
+    const viewDimension: GPUTextureViewDimension = 'cube-array';
+    const size = chooseTextureSize({ minSize: 8, minBlocks: 2, format, viewDimension });
+
+    const descriptor: GPUTextureDescriptor = {
+      format,
+      ...(t.isCompatibility && { textureBindingViewDimension: viewDimension }),
+      size,
+      usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING,
+      mipLevelCount: 3,
+    };
+    const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor);
+    const sampler: GPUSamplerDescriptor = {
+      addressModeU: kShortAddressModeToAddressMode[mode],
+      addressModeV: kShortAddressModeToAddressMode[mode],
+      addressModeW: kShortAddressModeToAddressMode[mode],
+      minFilter,
+      magFilter: minFilter,
+      mipmapFilter: minFilter,
+    };
+
+    const calls: TextureCall<vec3>[] = generateSamplePointsCube(50, {
+      method: samplePoints,
+      sampler,
+      descriptor,
+      textureBuiltin: 'textureGather',
+      arrayIndex: { num: texture.depthOrArrayLayers / 6, type: A },
+      hashInputs: [stage, format, samplePoints, mode, minFilter],
+    }).map(({ coords, arrayIndex }) => {
+      return {
+        builtin: 'textureGather',
+        arrayIndex,
+        arrayIndexType: A === 'i32' ? 'i' : 'u',
+        coordType: 'f',
+        coords,
+      };
+    });
+    const viewDescriptor = {
+      dimension: viewDimension,
+    };
+    const textureType = 'texture_depth_cube_array';
+    const results = await doTextureCalls(
+      t,
+      texture,
+      viewDescriptor,
+      textureType,
+      sampler,
+      calls,
+      stage
+    );
+    const res = await checkCallResults(
+      t,
+      { texels, descriptor, viewDescriptor },
+      textureType,
+      sampler,
+      calls,
+      results,
+      stage,
+      texture
+    );
+    t.expectOK(res);
+  });
diff --git a/src/webgpu/shader/execution/expression/call/builtin/textureGatherCompare.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/textureGatherCompare.spec.ts
index c743883ce849..f86a152c19bc 100644
--- a/src/webgpu/shader/execution/expression/call/builtin/textureGatherCompare.spec.ts
+++ b/src/webgpu/shader/execution/expression/call/builtin/textureGatherCompare.spec.ts
@@ -17,20 +17,42 @@ A texture gather compare operation performs a depth comparison on four texels in
 `;
 
 import { makeTestGroup } from '../../../../../../common/framework/test_group.js';
-import { GPUTest } from '../../../../../gpu_test.js';
+import { kCompareFunctions } from '../../../../../capability_info.js';
+import {
+  isDepthTextureFormat,
+  isEncodableTextureFormat,
+  kDepthStencilFormats,
+} from '../../../../../format_info.js';
 
-import { generateCoordBoundaries, generateOffsets } from './utils.js';
+import {
+  checkCallResults,
+  chooseTextureSize,
+  createTextureWithRandomDataAndGetTexels,
+  doTextureCalls,
+  generateSamplePointsCube,
+  generateTextureBuiltinInputs2D,
+  kCubeSamplePointMethods,
+  kSamplePointMethods,
+  kShortAddressModes,
+  kShortAddressModeToAddressMode,
+  kShortShaderStages,
+  makeRandomDepthComparisonTexelGenerator,
+  TextureCall,
+  vec2,
+  vec3,
+  WGSLTextureSampleTest,
+} from './texture_utils.js';
 
-export const g = makeTestGroup(GPUTest);
+export const g = makeTestGroup(WGSLTextureSampleTest);
 
 g.test('array_2d_coords')
   .specURL('https://www.w3.org/TR/WGSL/#texturegathercompare')
   .desc(
     `
-C: i32, u32
+A: i32, u32
 
-fn textureGatherCompare(t: texture_depth_2d_array, s: sampler_comparison, coords: vec2<f32>, array_index: C, depth_ref: f32) -> vec4<f32>
-fn textureGatherCompare(t: texture_depth_2d_array, s: sampler_comparison, coords: vec2<f32>, array_index: C, depth_ref: f32, offset: vec2<i32>) -> vec4<f32>
+fn textureGatherCompare(t: texture_depth_2d_array, s: sampler_comparison, coords: vec2<f32>, array_index: A, depth_ref: f32) -> vec4<f32>
+fn textureGatherCompare(t: texture_depth_2d_array, s: sampler_comparison, coords: vec2<f32>, array_index: A, depth_ref: f32, offset: vec2<i32>) -> vec4<f32>
 
 Parameters:
  * t: The depth texture to read from
@@ -46,24 +68,110 @@ Parameters:
       Values outside of this range will result in a shader-creation error.
 `
   )
-  .paramsSubcasesOnly(u =>
+  .params(u =>
     u
-      .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat'])
-      .combine('C', ['i32', 'u32'] as const)
-      .combine('C_value', [-1, 0, 1, 2, 3, 4])
-      .combine('coords', generateCoordBoundaries(2))
-      .combine('depth_ref', [-1 /* smaller ref */, 0 /* equal ref */, 1 /* larger ref */] as const)
-      .combine('offset', generateOffsets(2))
+      .combine('stage', kShortShaderStages)
+      .combine('format', kDepthStencilFormats)
+      // filter out stencil only formats
+      .filter(t => isDepthTextureFormat(t.format))
+      // MAINTENANCE_TODO: Remove when support for depth24plus, depth24plus-stencil8, and depth32float-stencil8 is added.
+      .filter(t => isEncodableTextureFormat(t.format))
+      .combine('filt', ['nearest', 'linear'] as const)
+      .combine('modeU', kShortAddressModes)
+      .combine('modeV', kShortAddressModes)
+      .combine('offset', [false, true] as const)
+      .beginSubcases()
+      .combine('samplePoints', kSamplePointMethods)
+      .combine('A', ['i32', 'u32'] as const)
+      .combine('compare', kCompareFunctions)
   )
-  .unimplemented();
+  .beforeAllSubcases(t => {
+    t.skipIfTextureFormatNotSupported(t.params.format);
+  })
+  .fn(async t => {
+    const {
+      format,
+      stage,
+      samplePoints,
+      A,
+      modeU,
+      modeV,
+      filt: minFilter,
+      compare,
+      offset,
+    } = t.params;
+
+    const viewDimension = '2d-array';
+    const size = chooseTextureSize({ minSize: 8, minBlocks: 4, format, viewDimension });
+
+    const descriptor: GPUTextureDescriptor = {
+      format,
+      size,
+      usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING,
+    };
+    const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor, {
+      generator: makeRandomDepthComparisonTexelGenerator(descriptor, compare),
+    });
+    const sampler: GPUSamplerDescriptor = {
+      addressModeU: kShortAddressModeToAddressMode[modeU],
+      addressModeV: kShortAddressModeToAddressMode[modeV],
+      compare,
+      minFilter,
+      magFilter: minFilter,
+      mipmapFilter: minFilter,
+    };
+
+    const calls: TextureCall<vec2>[] = generateTextureBuiltinInputs2D(50, {
+      method: samplePoints,
+      textureBuiltin: 'textureGatherCompare',
+      sampler,
+      descriptor,
+      arrayIndex: { num: texture.depthOrArrayLayers, type: A },
+      depthRef: true,
+      offset,
+      hashInputs: [stage, format, samplePoints, A, modeU, modeV, minFilter, offset],
+    }).map(({ coords, arrayIndex, depthRef, offset }) => {
+      return {
+        builtin: 'textureGatherCompare',
+        coordType: 'f',
+        coords,
+        arrayIndex,
+        arrayIndexType: A === 'i32' ? 'i' : 'u',
+        depthRef,
+        offset,
+      };
+    });
+    const textureType = 'texture_depth_2d_array';
+    const viewDescriptor = {};
+    const results = await doTextureCalls(
+      t,
+      texture,
+      viewDescriptor,
+      textureType,
+      sampler,
+      calls,
+      stage
+    );
+    const res = await checkCallResults(
+      t,
+      { texels, descriptor, viewDescriptor },
+      textureType,
+      sampler,
+      calls,
+      results,
+      stage,
+      texture
+    );
+    t.expectOK(res);
+  });
 
 g.test('array_3d_coords')
   .specURL('https://www.w3.org/TR/WGSL/#texturegathercompare')
   .desc(
     `
-C: i32, u32
+A: i32, u32
 
-fn textureGatherCompare(t: texture_depth_cube_array, s: sampler_comparison, coords: vec3<f32>, array_index: C, depth_ref: f32) -> vec4<f32>
+fn textureGatherCompare(t: texture_depth_cube_array, s: sampler_comparison, coords: vec3<f32>, array_index: A, depth_ref: f32) -> vec4<f32>
 
 Parameters:
  * t: The depth texture to read from
@@ -73,17 +181,94 @@ Parameters:
  * depth_ref: The reference value to compare the sampled depth value against
 `
   )
-  .paramsSubcasesOnly(u =>
+  .params(u =>
     u
-      .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat'])
-      .combine('C', ['i32', 'u32'] as const)
-      .combine('C_value', [-1, 0, 1, 2, 3, 4])
-      .combine('coords', generateCoordBoundaries(3))
-      .combine('depth_ref', [-1 /* smaller ref */, 0 /* equal ref */, 1 /* larger ref */] as const)
+      .combine('stage', kShortShaderStages)
+      .combine('format', kDepthStencilFormats)
+      // filter out stencil only formats
+      .filter(t => isDepthTextureFormat(t.format))
+      // MAINTENANCE_TODO: Remove when support for depth24plus, depth24plus-stencil8, and depth32float-stencil8 is added.
+      .filter(t => isEncodableTextureFormat(t.format))
+      .combine('filt', ['nearest', 'linear'] as const)
+      .combine('mode', kShortAddressModes)
+      .beginSubcases()
+      .combine('samplePoints', kCubeSamplePointMethods)
+      .combine('A', ['i32', 'u32'] as const)
+      .combine('compare', kCompareFunctions)
   )
-  .unimplemented();
+  .beforeAllSubcases(t => {
+    t.skipIfTextureViewDimensionNotSupported('cube-array');
+  })
+  .fn(async t => {
+    const { format, A, stage, samplePoints, mode, filt: minFilter, compare } = t.params;
+
+    const viewDimension: GPUTextureViewDimension = 'cube-array';
+    const size = chooseTextureSize({ minSize: 8, minBlocks: 2, format, viewDimension });
+
+    const descriptor: GPUTextureDescriptor = {
+      format,
+      ...(t.isCompatibility && { textureBindingViewDimension: viewDimension }),
+      size,
+      usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING,
+    };
+    const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor, {
+      generator: makeRandomDepthComparisonTexelGenerator(descriptor, compare),
+    });
+    const sampler: GPUSamplerDescriptor = {
+      addressModeU: kShortAddressModeToAddressMode[mode],
+      addressModeV: kShortAddressModeToAddressMode[mode],
+      addressModeW: kShortAddressModeToAddressMode[mode],
+      compare,
+      minFilter,
+      magFilter: minFilter,
+      mipmapFilter: minFilter,
+    };
 
-g.test('sampled_array_2d_coords')
+    const calls: TextureCall<vec3>[] = generateSamplePointsCube(50, {
+      method: samplePoints,
+      sampler,
+      descriptor,
+      textureBuiltin: 'textureGatherCompare',
+      arrayIndex: { num: texture.depthOrArrayLayers / 6, type: A },
+      depthRef: true,
+      hashInputs: [stage, format, samplePoints, mode, minFilter],
+    }).map(({ coords, depthRef, arrayIndex }) => {
+      return {
+        builtin: 'textureGatherCompare',
+        arrayIndex,
+        arrayIndexType: A === 'i32' ? 'i' : 'u',
+        coordType: 'f',
+        coords,
+        depthRef,
+      };
+    });
+    const viewDescriptor = {
+      dimension: viewDimension,
+    };
+    const textureType = 'texture_depth_cube_array';
+    const results = await doTextureCalls(
+      t,
+      texture,
+      viewDescriptor,
+      textureType,
+      sampler,
+      calls,
+      stage
+    );
+    const res = await checkCallResults(
+      t,
+      { texels, descriptor, viewDescriptor },
+      textureType,
+      sampler,
+      calls,
+      results,
+      stage,
+      texture
+    );
+    t.expectOK(res);
+  });
+
+g.test('sampled_2d_coords')
   .specURL('https://www.w3.org/TR/WGSL/#texturegathercompare')
   .desc(
     `
@@ -103,16 +288,85 @@ Parameters:
       Values outside of this range will result in a shader-creation error.
 `
   )
-  .paramsSubcasesOnly(u =>
+  .params(u =>
     u
-      .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat'])
-      .combine('coords', generateCoordBoundaries(2))
-      .combine('depth_ref', [-1 /* smaller ref */, 0 /* equal ref */, 1 /* larger ref */] as const)
-      .combine('offset', generateOffsets(2))
+      .combine('stage', kShortShaderStages)
+      .combine('format', kDepthStencilFormats)
+      // filter out stencil only formats
+      .filter(t => isDepthTextureFormat(t.format))
+      // MAINTENANCE_TODO: Remove when support for depth24plus, depth24plus-stencil8, and depth32float-stencil8 is added.
+      .filter(t => isEncodableTextureFormat(t.format))
+      .combine('filt', ['nearest', 'linear'] as const)
+      .combine('mode', kShortAddressModes)
+      .combine('offset', [false, true] as const)
+      .beginSubcases()
+      .combine('C', ['i32', 'u32'] as const)
+      .combine('samplePoints', kSamplePointMethods)
+      .combine('compare', kCompareFunctions)
   )
-  .unimplemented();
+  .fn(async t => {
+    const { format, C, stage, samplePoints, mode, compare, filt: minFilter, offset } = t.params;
+
+    const size = chooseTextureSize({ minSize: 8, minBlocks: 4, format });
+    const descriptor: GPUTextureDescriptor = {
+      format,
+      size,
+      usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING,
+    };
+    const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor, {
+      generator: makeRandomDepthComparisonTexelGenerator(descriptor, compare),
+    });
+    const sampler: GPUSamplerDescriptor = {
+      addressModeU: kShortAddressModeToAddressMode[mode],
+      addressModeV: kShortAddressModeToAddressMode[mode],
+      compare,
+      minFilter,
+      magFilter: minFilter,
+      mipmapFilter: minFilter,
+    };
+
+    const calls: TextureCall<vec2>[] = generateTextureBuiltinInputs2D(50, {
+      method: samplePoints,
+      textureBuiltin: 'textureGatherCompare',
+      sampler,
+      descriptor,
+      offset,
+      depthRef: true,
+      hashInputs: [stage, format, C, samplePoints, mode, minFilter, compare, offset],
+    }).map(({ coords, depthRef, offset }) => {
+      return {
+        builtin: 'textureGatherCompare',
+        coordType: 'f',
+        coords,
+        depthRef,
+        offset,
+      };
+    });
+    const textureType = 'texture_depth_2d';
+    const viewDescriptor = {};
+    const results = await doTextureCalls(
+      t,
+      texture,
+      viewDescriptor,
+      textureType,
+      sampler,
+      calls,
+      stage
+    );
+    const res = await checkCallResults(
+      t,
+      { texels, descriptor, viewDescriptor },
+      textureType,
+      sampler,
+      calls,
+      results,
+      stage,
+      texture
+    );
+    t.expectOK(res);
+  });
 
-g.test('sampled_array_3d_coords')
+g.test('sampled_3d_coords')
   .specURL('https://www.w3.org/TR/WGSL/#texturegathercompare')
   .desc(
     `
@@ -125,10 +379,82 @@ Parameters:
  * depth_ref: The reference value to compare the sampled depth value against
 `
   )
-  .paramsSubcasesOnly(u =>
+  .params(u =>
     u
-      .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat'])
-      .combine('coords', generateCoordBoundaries(3))
-      .combine('depth_ref', [-1 /* smaller ref */, 0 /* equal ref */, 1 /* larger ref */] as const)
+      .combine('stage', kShortShaderStages)
+      .combine('format', kDepthStencilFormats)
+      // filter out stencil only formats
+      .filter(t => isDepthTextureFormat(t.format))
+      // MAINTENANCE_TODO: Remove when support for depth24plus, depth24plus-stencil8, and depth32float-stencil8 is added.
+      .filter(t => isEncodableTextureFormat(t.format))
+      .combine('filt', ['nearest', 'linear'] as const)
+      .combine('mode', kShortAddressModes)
+      .beginSubcases()
+      .combine('samplePoints', kCubeSamplePointMethods)
+      .combine('compare', kCompareFunctions)
   )
-  .unimplemented();
+  .fn(async t => {
+    const { format, stage, samplePoints, mode, filt: minFilter, compare } = t.params;
+
+    const viewDimension: GPUTextureViewDimension = 'cube';
+    const size = chooseTextureSize({ minSize: 8, minBlocks: 2, format, viewDimension });
+
+    const descriptor: GPUTextureDescriptor = {
+      format,
+      ...(t.isCompatibility && { textureBindingViewDimension: viewDimension }),
+      size,
+      usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING,
+    };
+    const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor, {
+      generator: makeRandomDepthComparisonTexelGenerator(descriptor, compare),
+    });
+    const sampler: GPUSamplerDescriptor = {
+      addressModeU: kShortAddressModeToAddressMode[mode],
+      addressModeV: kShortAddressModeToAddressMode[mode],
+      addressModeW: kShortAddressModeToAddressMode[mode],
+      compare,
+      minFilter,
+      magFilter: minFilter,
+      mipmapFilter: minFilter,
+    };
+
+    const calls: TextureCall<vec3>[] = generateSamplePointsCube(50, {
+      method: samplePoints,
+      sampler,
+      descriptor,
+      depthRef: true,
+      textureBuiltin: 'textureGatherCompare',
+      hashInputs: [stage, format, samplePoints, mode, minFilter, compare],
+    }).map(({ coords, depthRef }) => {
+      return {
+        builtin: 'textureGatherCompare',
+        coordType: 'f',
+        coords,
+        depthRef,
+      };
+    });
+    const viewDescriptor = {
+      dimension: viewDimension,
+    };
+    const textureType = 'texture_depth_cube';
+    const results = await doTextureCalls(
+      t,
+      texture,
+      viewDescriptor,
+      textureType,
+      sampler,
+      calls,
+      stage
+    );
+    const res = await checkCallResults(
+      t,
+      { texels, descriptor, viewDescriptor },
+      textureType,
+      sampler,
+      calls,
+      results,
+      stage,
+      texture
+    );
+    t.expectOK(res);
+  });
diff --git a/src/webgpu/shader/execution/expression/call/builtin/textureLoad.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/textureLoad.spec.ts
index 879817ec8ca3..689df4feb084 100644
--- a/src/webgpu/shader/execution/expression/call/builtin/textureLoad.spec.ts
+++ b/src/webgpu/shader/execution/expression/call/builtin/textureLoad.spec.ts
@@ -50,6 +50,7 @@ import {
   vec2,
   vec3,
   kSamplePointMethods,
+  kShortShaderStages,
   generateTextureBuiltinInputs1D,
   generateTextureBuiltinInputs2D,
   generateTextureBuiltinInputs3D,
@@ -90,6 +91,7 @@ Parameters:
   )
   .params(u =>
     u
+      .combine('stage', kShortShaderStages)
       .combine('format', kTestableColorFormats)
       .filter(t => textureDimensionAndFormatCompatible('1d', t.format))
       // 1d textures can't have a height !== 1
@@ -105,7 +107,7 @@ Parameters:
     t.selectDeviceForTextureFormatOrSkipTestCase(t.params.format);
   })
   .fn(async t => {
-    const { format, C, L, samplePoints } = t.params;
+    const { format, stage, C, L, samplePoints } = t.params;
 
     // We want at least 4 blocks or something wide enough for 3 mip levels.
     const [width] = chooseTextureSize({ minSize: 8, minBlocks: 4, format });
@@ -123,7 +125,7 @@ Parameters:
       method: samplePoints,
       descriptor,
       mipLevel: { num: texture.mipLevelCount, type: L },
-      hashInputs: [format, samplePoints, C, L],
+      hashInputs: [stage, format, samplePoints, C, L],
     }).map(({ coords, mipLevel }, i) => {
       return {
         builtin: 'textureLoad',
@@ -137,14 +139,24 @@ Parameters:
     const textureType = appendComponentTypeForFormatToTextureType('texture_1d', texture.format);
     const viewDescriptor = {};
     const sampler = undefined;
-    const results = await doTextureCalls(t, texture, viewDescriptor, textureType, sampler, calls);
+    const results = await doTextureCalls(
+      t,
+      texture,
+      viewDescriptor,
+      textureType,
+      sampler,
+      calls,
+      stage
+    );
     const res = await checkCallResults(
       t,
       { texels, descriptor, viewDescriptor },
       textureType,
       sampler,
       calls,
-      results
+      results,
+      stage,
+      texture
     );
     t.expectOK(res);
   });
@@ -166,9 +178,9 @@ Parameters:
   )
   .params(u =>
     u
+      .combine('stage', kShortShaderStages)
       .combine('format', kTestableColorFormats)
-      // MAINTENANCE_TODO: Update createTextureFromTexelViews to support stencil8 and remove this filter.
-      .filter(t => t.format !== 'stencil8' && !isCompressedFloatTextureFormat(t.format))
+      .filter(t => !isCompressedFloatTextureFormat(t.format))
       .beginSubcases()
       .combine('samplePoints', kSamplePointMethods)
       .combine('C', ['i32', 'u32'] as const)
@@ -180,7 +192,7 @@ Parameters:
     t.selectDeviceForTextureFormatOrSkipTestCase(t.params.format);
   })
   .fn(async t => {
-    const { format, samplePoints, C, L } = t.params;
+    const { format, stage, samplePoints, C, L } = t.params;
 
     // We want at least 4 blocks or something wide enough for 3 mip levels.
     const size = chooseTextureSize({ minSize: 8, minBlocks: 4, format });
@@ -188,10 +200,7 @@ Parameters:
     const descriptor: GPUTextureDescriptor = {
       format,
       size,
-      usage:
-        GPUTextureUsage.COPY_DST |
-        GPUTextureUsage.TEXTURE_BINDING |
-        (canUseAsRenderTarget(format) ? GPUTextureUsage.RENDER_ATTACHMENT : 0),
+      usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING,
       mipLevelCount: maxMipLevelCount({ size }),
     };
     const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor);
@@ -199,7 +208,7 @@ Parameters:
     const calls: TextureCall<vec2>[] = generateTextureBuiltinInputs2D(50, {
       method: samplePoints,
       descriptor,
-      hashInputs: [format, samplePoints, C, L],
+      hashInputs: [stage, format, samplePoints, C, L],
     }).map(({ coords, mipLevel }) => {
       return {
         builtin: 'textureLoad',
@@ -213,14 +222,24 @@ Parameters:
     const textureType = appendComponentTypeForFormatToTextureType('texture_2d', texture.format);
     const viewDescriptor = {};
     const sampler = undefined;
-    const results = await doTextureCalls(t, texture, viewDescriptor, textureType, sampler, calls);
+    const results = await doTextureCalls(
+      t,
+      texture,
+      viewDescriptor,
+      textureType,
+      sampler,
+      calls,
+      stage
+    );
     const res = await checkCallResults(
       t,
       { texels, descriptor, viewDescriptor },
       textureType,
       sampler,
       calls,
-      results
+      results,
+      stage,
+      texture
     );
     t.expectOK(res);
   });
@@ -241,6 +260,7 @@ Parameters:
   )
   .params(u =>
     u
+      .combine('stage', kShortShaderStages)
       .combine('format', kTestableColorFormats)
       .filter(t => textureDimensionAndFormatCompatible('3d', t.format))
       .beginSubcases()
@@ -254,7 +274,7 @@ Parameters:
     t.selectDeviceForTextureFormatOrSkipTestCase(t.params.format);
   })
   .fn(async t => {
-    const { format, samplePoints, C, L } = t.params;
+    const { format, stage, samplePoints, C, L } = t.params;
 
     // We want at least 4 blocks or something wide enough for 3 mip levels.
     const size = chooseTextureSize({ minSize: 8, minBlocks: 4, format, viewDimension: '3d' });
@@ -272,7 +292,7 @@ Parameters:
       method: samplePoints,
       descriptor,
       mipLevel: { num: texture.mipLevelCount, type: L },
-      hashInputs: [format, samplePoints, C, L],
+      hashInputs: [stage, format, samplePoints, C, L],
     }).map(({ coords, mipLevel }) => {
       return {
         builtin: 'textureLoad',
@@ -286,14 +306,24 @@ Parameters:
     const textureType = appendComponentTypeForFormatToTextureType('texture_3d', texture.format);
     const viewDescriptor = {};
     const sampler = undefined;
-    const results = await doTextureCalls(t, texture, viewDescriptor, textureType, sampler, calls);
+    const results = await doTextureCalls(
+      t,
+      texture,
+      viewDescriptor,
+      textureType,
+      sampler,
+      calls,
+      stage
+    );
     const res = await checkCallResults(
       t,
       { texels, descriptor, viewDescriptor },
       textureType,
       sampler,
       calls,
-      results
+      results,
+      stage,
+      texture
     );
     t.expectOK(res);
   });
@@ -316,6 +346,7 @@ Parameters:
   )
   .params(u =>
     u
+      .combine('stage', kShortShaderStages)
       .combine('texture_type', [
         'texture_multisampled_2d',
         'texture_depth_multisampled_2d',
@@ -340,7 +371,7 @@ Parameters:
     t.selectDeviceForTextureFormatOrSkipTestCase(t.params.format);
   })
   .fn(async t => {
-    const { texture_type, format, samplePoints, C, S } = t.params;
+    const { texture_type, format, stage, samplePoints, C, S } = t.params;
 
     const sampleCount = 4;
     const descriptor: GPUTextureDescriptor = {
@@ -358,7 +389,7 @@ Parameters:
       method: samplePoints,
       descriptor,
       sampleIndex: { num: texture.sampleCount, type: S },
-      hashInputs: [format, samplePoints, C, S],
+      hashInputs: [stage, format, samplePoints, C, S],
     }).map(({ coords, sampleIndex }) => {
       return {
         builtin: 'textureLoad',
@@ -372,14 +403,24 @@ Parameters:
     const textureType = appendComponentTypeForFormatToTextureType(texture_type, texture.format);
     const viewDescriptor = {};
     const sampler = undefined;
-    const results = await doTextureCalls(t, texture, viewDescriptor, textureType, sampler, calls);
+    const results = await doTextureCalls(
+      t,
+      texture,
+      viewDescriptor,
+      textureType,
+      sampler,
+      calls,
+      stage
+    );
     const res = await checkCallResults(
       t,
       { texels, descriptor, viewDescriptor },
       textureType,
       sampler,
       calls,
-      results
+      results,
+      stage,
+      texture
     );
     t.expectOK(res);
   });
@@ -400,6 +441,7 @@ Parameters:
   )
   .params(u =>
     u
+      .combine('stage', kShortShaderStages)
       .combine('format', kDepthStencilFormats)
       // filter out stencil only formats
       .filter(t => isDepthTextureFormat(t.format))
@@ -414,7 +456,7 @@ Parameters:
     t.skipIfTextureLoadNotSupportedForTextureType('texture_depth_2d');
   })
   .fn(async t => {
-    const { format, samplePoints, C, L } = t.params;
+    const { format, stage, samplePoints, C, L } = t.params;
 
     // We want at least 4 blocks or something wide enough for 3 mip levels.
     const size = chooseTextureSize({ minSize: 8, minBlocks: 4, format });
@@ -422,10 +464,7 @@ Parameters:
     const descriptor: GPUTextureDescriptor = {
       format,
       size,
-      usage:
-        GPUTextureUsage.COPY_DST |
-        GPUTextureUsage.TEXTURE_BINDING |
-        GPUTextureUsage.RENDER_ATTACHMENT,
+      usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING,
       mipLevelCount: maxMipLevelCount({ size }),
     };
     const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor);
@@ -434,7 +473,7 @@ Parameters:
       method: samplePoints,
       descriptor,
       mipLevel: { num: texture.mipLevelCount, type: L },
-      hashInputs: [format, samplePoints, C, L],
+      hashInputs: [stage, format, samplePoints, C, L],
     }).map(({ coords, mipLevel }) => {
       return {
         builtin: 'textureLoad',
@@ -447,14 +486,24 @@ Parameters:
     const textureType = 'texture_depth_2d';
     const viewDescriptor = {};
     const sampler = undefined;
-    const results = await doTextureCalls(t, texture, viewDescriptor, textureType, sampler, calls);
+    const results = await doTextureCalls(
+      t,
+      texture,
+      viewDescriptor,
+      textureType,
+      sampler,
+      calls,
+      stage
+    );
     const res = await checkCallResults(
       t,
       { texels, descriptor, viewDescriptor },
       textureType,
       sampler,
       calls,
-      results
+      results,
+      stage,
+      texture
     );
     t.expectOK(res);
   });
@@ -472,14 +521,19 @@ Parameters:
  * coords: The 0-based texel coordinate.
 `
   )
-  .paramsSubcasesOnly(u =>
+  .params(u =>
     u
+      .combine('stage', kShortShaderStages)
+      .beginSubcases()
       .combine('samplePoints', kSamplePointMethods)
       .combine('C', ['i32', 'u32'] as const)
       .combine('L', ['i32', 'u32'] as const)
   )
+  .beforeAllSubcases(t =>
+    t.skipIf(typeof VideoFrame === 'undefined', 'VideoFrames are not supported')
+  )
   .fn(async t => {
-    const { samplePoints, C, L } = t.params;
+    const { stage, samplePoints, C, L } = t.params;
 
     const size = [8, 8, 1];
 
@@ -490,6 +544,7 @@ Parameters:
       size,
       usage: GPUTextureUsage.COPY_DST,
     };
+
     const { texels, videoFrame } = createVideoFrameWithRandomDataAndGetTexels(descriptor.size);
     const texture = t.device.importExternalTexture({ source: videoFrame });
 
@@ -508,14 +563,23 @@ Parameters:
     const textureType = 'texture_external';
     const viewDescriptor = {};
     const sampler = undefined;
-    const results = await doTextureCalls(t, texture, viewDescriptor, textureType, sampler, calls);
+    const results = await doTextureCalls(
+      t,
+      texture,
+      viewDescriptor,
+      textureType,
+      sampler,
+      calls,
+      stage
+    );
     const res = await checkCallResults(
       t,
       { texels, descriptor, viewDescriptor },
       textureType,
       sampler,
       calls,
-      results
+      results,
+      stage
     );
     t.expectOK(res);
     videoFrame.close();
@@ -539,6 +603,7 @@ Parameters:
   )
   .params(u =>
     u
+      .combine('stage', kShortShaderStages)
       .combine('format', kTestableColorFormats)
       // MAINTENANCE_TODO: Update createTextureFromTexelViews to support stencil8 and remove this filter.
       .filter(t => t.format !== 'stencil8' && !isCompressedFloatTextureFormat(t.format))
@@ -562,7 +627,7 @@ Parameters:
     t.selectDeviceForTextureFormatOrSkipTestCase(t.params.format);
   })
   .fn(async t => {
-    const { texture_type, format, samplePoints, C, A, L } = t.params;
+    const { texture_type, format, stage, samplePoints, C, A, L } = t.params;
 
     // We want at least 4 blocks or something wide enough for 3 mip levels.
     const size = chooseTextureSize({ minSize: 8, minBlocks: 4, format, viewDimension: '3d' });
@@ -583,7 +648,7 @@ Parameters:
       descriptor,
       mipLevel: { num: texture.mipLevelCount, type: L },
       arrayIndex: { num: texture.depthOrArrayLayers, type: A },
-      hashInputs: [format, samplePoints, C, L, A],
+      hashInputs: [stage, format, samplePoints, C, L, A],
     }).map(({ coords, mipLevel, arrayIndex }) => {
       return {
         builtin: 'textureLoad',
@@ -598,14 +663,24 @@ Parameters:
     const textureType = appendComponentTypeForFormatToTextureType(texture_type, texture.format);
     const viewDescriptor = {};
     const sampler = undefined;
-    const results = await doTextureCalls(t, texture, viewDescriptor, textureType, sampler, calls);
+    const results = await doTextureCalls(
+      t,
+      texture,
+      viewDescriptor,
+      textureType,
+      sampler,
+      calls,
+      stage
+    );
     const res = await checkCallResults(
       t,
       { texels, descriptor, viewDescriptor },
       textureType,
       sampler,
       calls,
-      results
+      results,
+      stage,
+      texture
     );
     t.expectOK(res);
   });
@@ -625,6 +700,7 @@ Parameters:
   )
   .params(u =>
     u
+      .combine('stage', kShortShaderStages)
       .combineWithParams([...TexelFormats, { format: 'bgra8unorm' }] as const)
       .beginSubcases()
       .combine('samplePoints', kSamplePointMethods)
@@ -639,7 +715,7 @@ Parameters:
     }
   })
   .fn(async t => {
-    const { format, samplePoints, C } = t.params;
+    const { format, stage, samplePoints, C } = t.params;
 
     // We want at least 3 blocks or something wide enough for 3 mip levels.
     const [width] = chooseTextureSize({ minSize: 8, minBlocks: 4, format });
@@ -655,7 +731,7 @@ Parameters:
     const calls: TextureCall<vec1>[] = generateTextureBuiltinInputs1D(50, {
       method: samplePoints,
       descriptor,
-      hashInputs: [format, samplePoints, C],
+      hashInputs: [stage, format, samplePoints, C],
     }).map(({ coords }) => {
       return {
         builtin: 'textureLoad',
@@ -666,14 +742,24 @@ Parameters:
     const textureType = `texture_storage_1d<${format}, read>`;
     const viewDescriptor = {};
     const sampler = undefined;
-    const results = await doTextureCalls(t, texture, viewDescriptor, textureType, sampler, calls);
+    const results = await doTextureCalls(
+      t,
+      texture,
+      viewDescriptor,
+      textureType,
+      sampler,
+      calls,
+      stage
+    );
     const res = await checkCallResults(
       t,
       { texels, descriptor, viewDescriptor },
       textureType,
       sampler,
       calls,
-      results
+      results,
+      stage,
+      texture
     );
     t.expectOK(res);
   });
@@ -693,6 +779,7 @@ Parameters:
   )
   .params(u =>
     u
+      .combine('stage', kShortShaderStages)
       .combineWithParams([...TexelFormats, { format: 'bgra8unorm' }] as const)
       .beginSubcases()
       .combine('samplePoints', kSamplePointMethods)
@@ -707,7 +794,7 @@ Parameters:
     }
   })
   .fn(async t => {
-    const { format, samplePoints, C } = t.params;
+    const { format, stage, samplePoints, C } = t.params;
 
     // We want at least 3 blocks or something wide enough for 3 mip levels.
     const size = chooseTextureSize({ minSize: 8, minBlocks: 3, format });
@@ -721,7 +808,7 @@ Parameters:
     const calls: TextureCall<vec2>[] = generateTextureBuiltinInputs2D(50, {
       method: samplePoints,
       descriptor,
-      hashInputs: [format, samplePoints, C],
+      hashInputs: [stage, format, samplePoints, C],
     }).map(({ coords }) => {
       return {
         builtin: 'textureLoad',
@@ -732,14 +819,24 @@ Parameters:
     const textureType = `texture_storage_2d<${format}, read>`;
     const viewDescriptor = {};
     const sampler = undefined;
-    const results = await doTextureCalls(t, texture, viewDescriptor, textureType, sampler, calls);
+    const results = await doTextureCalls(
+      t,
+      texture,
+      viewDescriptor,
+      textureType,
+      sampler,
+      calls,
+      stage
+    );
     const res = await checkCallResults(
       t,
       { texels, descriptor, viewDescriptor },
       textureType,
       sampler,
       calls,
-      results
+      results,
+      stage,
+      texture
     );
     t.expectOK(res);
   });
@@ -761,6 +858,7 @@ Parameters:
   )
   .params(u =>
     u
+      .combine('stage', kShortShaderStages)
       .combineWithParams([...TexelFormats, { format: 'bgra8unorm' }] as const)
       .beginSubcases()
       .combine('samplePoints', kSamplePointMethods)
@@ -776,7 +874,7 @@ Parameters:
     }
   })
   .fn(async t => {
-    const { format, samplePoints, C, A } = t.params;
+    const { format, stage, samplePoints, C, A } = t.params;
 
     // We want at least 3 blocks or something wide enough for 3 mip levels.
     const size = chooseTextureSize({ minSize: 8, minBlocks: 4, format, viewDimension: '3d' });
@@ -791,7 +889,7 @@ Parameters:
       method: samplePoints,
       descriptor,
       arrayIndex: { num: texture.depthOrArrayLayers, type: A },
-      hashInputs: [format, samplePoints, C, A],
+      hashInputs: [stage, format, samplePoints, C, A],
     }).map(({ coords, arrayIndex }) => {
       return {
         builtin: 'textureLoad',
@@ -806,14 +904,24 @@ Parameters:
       dimension: '2d-array',
     };
     const sampler = undefined;
-    const results = await doTextureCalls(t, texture, viewDescriptor, textureType, sampler, calls);
+    const results = await doTextureCalls(
+      t,
+      texture,
+      viewDescriptor,
+      textureType,
+      sampler,
+      calls,
+      stage
+    );
     const res = await checkCallResults(
       t,
       { texels, descriptor, viewDescriptor },
       textureType,
       sampler,
       calls,
-      results
+      results,
+      stage,
+      texture
     );
     t.expectOK(res);
   });
@@ -833,6 +941,7 @@ Parameters:
   )
   .params(u =>
     u
+      .combine('stage', kShortShaderStages)
       .combineWithParams([...TexelFormats, { format: 'bgra8unorm' }] as const)
       .beginSubcases()
       .combine('samplePoints', kSamplePointMethods)
@@ -847,7 +956,7 @@ Parameters:
     }
   })
   .fn(async t => {
-    const { format, samplePoints, C } = t.params;
+    const { format, stage, samplePoints, C } = t.params;
 
     // We want at least 3 blocks or something wide enough for 3 mip levels.
     const size = chooseTextureSize({ minSize: 8, minBlocks: 4, format, viewDimension: '3d' });
@@ -862,7 +971,7 @@ Parameters:
     const calls: TextureCall<vec3>[] = generateTextureBuiltinInputs3D(50, {
       method: samplePoints,
       descriptor,
-      hashInputs: [format, samplePoints, C],
+      hashInputs: [stage, format, samplePoints, C],
     }).map(({ coords }) => {
       return {
         builtin: 'textureLoad',
@@ -873,14 +982,24 @@ Parameters:
     const textureType = `texture_storage_3d<${format}, read>`;
     const viewDescriptor = {};
     const sampler = undefined;
-    const results = await doTextureCalls(t, texture, viewDescriptor, textureType, sampler, calls);
+    const results = await doTextureCalls(
+      t,
+      texture,
+      viewDescriptor,
+      textureType,
+      sampler,
+      calls,
+      stage
+    );
     const res = await checkCallResults(
       t,
       { texels, descriptor, viewDescriptor },
       textureType,
       sampler,
       calls,
-      results
+      results,
+      stage,
+      texture
     );
     t.expectOK(res);
   });
diff --git a/src/webgpu/shader/execution/expression/call/builtin/textureNumLayers.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/textureNumLayers.spec.ts
index ca7ae3d0655c..500376321444 100644
--- a/src/webgpu/shader/execution/expression/call/builtin/textureNumLayers.spec.ts
+++ b/src/webgpu/shader/execution/expression/call/builtin/textureNumLayers.spec.ts
@@ -51,13 +51,13 @@ Parameters
   .params(u =>
     u
       .combine('texture_type', ['texture_2d_array', 'texture_cube_array'] as const)
+      .combine('view_type', ['full', 'partial'] as const)
       .beginSubcases()
       .combine('sampled_type', ['f32', 'i32', 'u32'] as const)
-      .combine('view_type', ['full', 'partial'] as const)
   )
   .beforeAllSubcases(t => {
     t.skipIf(
-      t.isCompatibility && t.params.view === 'partial',
+      t.isCompatibility && t.params.view_type === 'partial',
       'compatibility mode does not support partial layer views'
     );
     t.skipIf(
@@ -110,12 +110,11 @@ Parameters
   .params(u =>
     u
       .combine('texture_type', ['texture_depth_2d_array', 'texture_depth_cube_array'] as const)
-      .beginSubcases()
       .combine('view_type', ['full', 'partial'] as const)
   )
   .beforeAllSubcases(t => {
     t.skipIf(
-      t.isCompatibility && t.params.view === 'partial',
+      t.isCompatibility && t.params.view_type === 'partial',
       'compatibility mode does not support partial layer views'
     );
     t.skipIf(
@@ -184,14 +183,20 @@ Parameters
   .params(u =>
     u
       .combineWithParams(TexelFormats)
+      .combine('view_type', ['full', 'partial'] as const)
       .beginSubcases()
       .combine('access_mode', ['read', 'write', 'read_write'] as const)
       .filter(
         t => t.access_mode !== 'read_write' || kTextureFormatInfo[t.format].color?.readWriteStorage
       )
-      .combine('view_type', ['full', 'partial'] as const)
   )
-  .beforeAllSubcases(t => t.skipIfTextureFormatNotUsableAsStorageTexture(t.params.format))
+  .beforeAllSubcases(t => {
+    t.skipIf(
+      t.isCompatibility && t.params.view_type === 'partial',
+      'compatibility mode does not support partial layer views'
+    );
+    t.skipIfTextureFormatNotUsableAsStorageTexture(t.params.format);
+  })
   .fn(t => {
     const { format, access_mode, view_type } = t.params;
 
diff --git a/src/webgpu/shader/execution/expression/call/builtin/textureNumLevels.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/textureNumLevels.spec.ts
index 5610701601cb..471a462504d4 100644
--- a/src/webgpu/shader/execution/expression/call/builtin/textureNumLevels.spec.ts
+++ b/src/webgpu/shader/execution/expression/call/builtin/textureNumLevels.spec.ts
@@ -88,6 +88,7 @@ Parameters
     const texture = t.createTextureTracked({
       format,
       dimension,
+      ...(t.isCompatibility && { textureBindingViewDimension: viewDimension }),
       usage: GPUTextureUsage.TEXTURE_BINDING,
       size: {
         width,
@@ -157,6 +158,7 @@ Parameters
     const texture = t.createTextureTracked({
       format: 'depth32float',
       dimension,
+      ...(t.isCompatibility && { textureBindingViewDimension: viewDimension }),
       usage: GPUTextureUsage.TEXTURE_BINDING,
       size: {
         width,
diff --git a/src/webgpu/shader/execution/expression/call/builtin/textureSample.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/textureSample.spec.ts
index e1aa3f67328c..b469cc7f01f4 100644
--- a/src/webgpu/shader/execution/expression/call/builtin/textureSample.spec.ts
+++ b/src/webgpu/shader/execution/expression/call/builtin/textureSample.spec.ts
@@ -1,14 +1,20 @@
 export const description = `
 Samples a texture.
 
+- TODO: test cube maps with more than 1 mip level.
+- TODO: test un-encodable formats.
+
 note: uniformity validation is covered in src/webgpu/shader/validation/uniformity/uniformity.spec.ts
 `;
 
 import { makeTestGroup } from '../../../../../../common/framework/test_group.js';
 import {
-  isCompressedTextureFormat,
+  isDepthTextureFormat,
+  isEncodableTextureFormat,
   kCompressedTextureFormats,
+  kDepthStencilFormats,
   kEncodableTextureFormats,
+  textureDimensionAndFormatCompatible,
 } from '../../../../../format_info.js';
 import { TextureTestMixin } from '../../../../../gpu_test.js';
 
@@ -16,10 +22,11 @@ import {
   vec2,
   vec3,
   TextureCall,
-  putDataInTextureThenDrawAndCheckResultsComparedToSoftwareRasterizer,
   generateTextureBuiltinInputs2D,
   generateTextureBuiltinInputs3D,
   kSamplePointMethods,
+  kShortAddressModes,
+  kShortAddressModeToAddressMode,
   doTextureCalls,
   checkCallResults,
   createTextureWithRandomDataAndGetTexels,
@@ -29,11 +36,13 @@ import {
   chooseTextureSize,
   isPotentiallyFilterableAndFillable,
   skipIfTextureFormatNotSupportedNotAvailableOrNotFilterable,
-  getDepthOrArrayLayersForViewDimension,
   getTextureTypeForTextureViewDimension,
   WGSLTextureSampleTest,
+  isSupportedViewFormatCombo,
+  vec1,
+  generateTextureBuiltinInputs1D,
+  skipIfNeedsFilteringAndIsUnfilterable,
 } from './texture_utils.js';
-import { generateCoordBoundaries, generateOffsets } from './utils.js';
 
 const kTestableColorFormats = [...kEncodableTextureFormats, ...kCompressedTextureFormats] as const;
 
@@ -49,164 +58,171 @@ Parameters:
  * t  The sampled, depth, or external texture to sample.
  * s  The sampler type.
  * coords The texture coordinates used for sampling.
-`
-  )
-  .paramsSubcasesOnly(u =>
-    u
-      .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const)
-      .combine('coords', generateCoordBoundaries(1))
-  )
-  .unimplemented();
-
-g.test('sampled_2d_coords')
-  .specURL('https://www.w3.org/TR/WGSL/#texturesample')
-  .desc(
-    `
-fn textureSample(t: texture_2d<f32>, s: sampler, coords: vec2<f32>) -> vec4<f32>
-fn textureSample(t: texture_2d<f32>, s: sampler, coords: vec2<f32>, offset: vec2<i32>) -> vec4<f32>
-
-Parameters:
- * t  The sampled, depth, or external texture to sample.
- * s  The sampler type.
- * coords The texture coordinates used for sampling.
- * offset
-    * The optional texel offset applied to the unnormalized texture coordinate before sampling the texture.
-    * This offset is applied before applying any texture wrapping modes.
-    * The offset expression must be a creation-time expression (e.g. vec2<i32>(1, 2)).
-    * Each offset component must be at least -8 and at most 7.
-      Values outside of this range will result in a shader-creation error.
 `
   )
   .params(u =>
     u
       .combine('format', kTestableColorFormats)
+      .filter(t => textureDimensionAndFormatCompatible('1d', t.format))
       .filter(t => isPotentiallyFilterableAndFillable(t.format))
-      .combine('samplePoints', kSamplePointMethods)
+      .combine('filt', ['nearest', 'linear'] as const)
+      .combine('modeU', kShortAddressModes)
       .beginSubcases()
-      .combine('addressModeU', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const)
-      .combine('addressModeV', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const)
-      .combine('minFilter', ['nearest', 'linear'] as const)
-      .combine('offset', [false, true] as const)
+      .combine('samplePoints', kSamplePointMethods)
   )
   .beforeAllSubcases(t =>
     skipIfTextureFormatNotSupportedNotAvailableOrNotFilterable(t, t.params.format)
   )
   .fn(async t => {
-    const { format, samplePoints, addressModeU, addressModeV, minFilter, offset } = t.params;
+    const { format, samplePoints, modeU, filt: minFilter } = t.params;
 
     // We want at least 4 blocks or something wide enough for 3 mip levels.
-    const [width, height] = chooseTextureSize({ minSize: 8, minBlocks: 4, format });
+    const size = chooseTextureSize({ minSize: 8, minBlocks: 4, format, viewDimension: '1d' });
 
     const descriptor: GPUTextureDescriptor = {
       format,
-      size: { width, height },
+      dimension: '1d',
+      size,
       usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING,
     };
     const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor);
     const sampler: GPUSamplerDescriptor = {
-      addressModeU,
-      addressModeV,
+      addressModeU: kShortAddressModeToAddressMode[modeU],
       minFilter,
       magFilter: minFilter,
     };
 
-    const calls: TextureCall<vec2>[] = generateTextureBuiltinInputs2D(50, {
+    const calls: TextureCall<vec1>[] = generateTextureBuiltinInputs1D(50, {
       sampler,
       method: samplePoints,
       descriptor,
-      offset: true,
-      hashInputs: [format, samplePoints, addressModeU, addressModeV, minFilter, offset],
-    }).map(({ coords, offset }) => {
+      derivatives: true,
+      hashInputs: [format, samplePoints, modeU, minFilter],
+    }).map(({ coords, derivativeMult }) => {
       return {
         builtin: 'textureSample',
         coordType: 'f',
         coords,
-        offset,
+        derivativeMult,
       };
     });
     const viewDescriptor = {};
+    const textureType = 'texture_1d<f32>';
     const results = await doTextureCalls(
       t,
       texture,
       viewDescriptor,
-      'texture_2d<f32>',
+      textureType,
       sampler,
-      calls
+      calls,
+      'f'
     );
     const res = await checkCallResults(
       t,
       { texels, descriptor, viewDescriptor },
-      'texture_2d<f32>',
+      textureType,
       sampler,
       calls,
-      results
+      results,
+      'f',
+      texture
     );
     t.expectOK(res);
   });
 
-g.test('sampled_2d_coords,derivatives')
+g.test('sampled_2d_coords')
   .specURL('https://www.w3.org/TR/WGSL/#texturesample')
   .desc(
     `
 fn textureSample(t: texture_2d<f32>, s: sampler, coords: vec2<f32>) -> vec4<f32>
 fn textureSample(t: texture_2d<f32>, s: sampler, coords: vec2<f32>, offset: vec2<i32>) -> vec4<f32>
 
-test mip level selection based on derivatives
-    `
+Parameters:
+ * t  The sampled, depth, or external texture to sample.
+ * s  The sampler type.
+ * coords The texture coordinates used for sampling.
+ * offset
+    * The optional texel offset applied to the unnormalized texture coordinate before sampling the texture.
+    * This offset is applied before applying any texture wrapping modes.
+    * The offset expression must be a creation-time expression (e.g. vec2<i32>(1, 2)).
+    * Each offset component must be at least -8 and at most 7.
+      Values outside of this range will result in a shader-creation error.
+`
   )
   .params(u =>
     u
       .combine('format', kTestableColorFormats)
       .filter(t => isPotentiallyFilterableAndFillable(t.format))
-      .combine('mipmapFilter', ['nearest', 'linear'] as const)
+      .combine('filt', ['nearest', 'linear'] as const)
+      .combine('modeU', kShortAddressModes)
+      .combine('modeV', kShortAddressModes)
+      .combine('offset', [false, true] as const)
       .beginSubcases()
-      // note: this is the derivative we want at sample time. It is not the value
-      // passed directly to the shader. This way if we change the texture size
-      // or render target size we can compute the correct values to achieve the
-      // same results.
-      .combineWithParams([
-        { ddx: 0.5, ddy: 0.5 }, // test mag filter
-        { ddx: 1, ddy: 1 }, // test level 0
-        { ddx: 2, ddy: 1 }, // test level 1 via ddx
-        { ddx: 1, ddy: 4 }, // test level 2 via ddy
-        { ddx: 1.5, ddy: 1.5 }, // test mix between 1 and 2
-        { ddx: 6, ddy: 6 }, // test mix between 2 and 3 (there is no 3 so we should get just 2)
-        { ddx: 1.5, ddy: 1.5, offset: [7, -8] as const }, // test mix between 1 and 2 with offset
-        { ddx: 1.5, ddy: 1.5, offset: [3, -3] as const }, // test mix between 1 and 2 with offset
-        { ddx: 1.5, ddy: 1.5, uvwStart: [-3.5, -4] as const }, // test mix between 1 and 2 with negative coords
-      ])
-  )
-  .beforeAllSubcases(t =>
-    skipIfTextureFormatNotSupportedNotAvailableOrNotFilterable(t, t.params.format)
+      .combine('samplePoints', kSamplePointMethods)
   )
+  .beforeAllSubcases(t => {
+    skipIfTextureFormatNotSupportedNotAvailableOrNotFilterable(t, t.params.format);
+  })
   .fn(async t => {
-    const { format, mipmapFilter, ddx, ddy, uvwStart, offset } = t.params;
+    const { format, samplePoints, modeU, modeV, filt: minFilter, offset } = t.params;
+    skipIfNeedsFilteringAndIsUnfilterable(t, minFilter, format);
 
     // We want at least 4 blocks or something wide enough for 3 mip levels.
     const [width, height] = chooseTextureSize({ minSize: 8, minBlocks: 4, format });
 
     const descriptor: GPUTextureDescriptor = {
       format,
-      mipLevelCount: 3,
       size: { width, height },
       usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING,
+      mipLevelCount: 3,
     };
-
+    const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor);
     const sampler: GPUSamplerDescriptor = {
-      addressModeU: 'repeat',
-      addressModeV: 'repeat',
-      minFilter: 'linear',
-      magFilter: 'linear',
-      mipmapFilter,
+      addressModeU: kShortAddressModeToAddressMode[modeU],
+      addressModeV: kShortAddressModeToAddressMode[modeV],
+      minFilter,
+      magFilter: minFilter,
+      mipmapFilter: minFilter,
     };
+
+    const calls: TextureCall<vec2>[] = generateTextureBuiltinInputs2D(50, {
+      sampler,
+      method: samplePoints,
+      descriptor,
+      derivatives: true,
+      offset: true,
+      hashInputs: [format, samplePoints, modeU, modeV, minFilter, offset],
+    }).map(({ coords, derivativeMult, offset }) => {
+      return {
+        builtin: 'textureSample',
+        coordType: 'f',
+        coords,
+        derivativeMult,
+        offset,
+      };
+    });
     const viewDescriptor = {};
-    await putDataInTextureThenDrawAndCheckResultsComparedToSoftwareRasterizer(
+    const textureType = 'texture_2d<f32>';
+    const results = await doTextureCalls(
       t,
-      descriptor,
+      texture,
       viewDescriptor,
+      textureType,
+      sampler,
+      calls,
+      'f'
+    );
+    const res = await checkCallResults(
+      t,
+      { texels, descriptor, viewDescriptor },
+      textureType,
       sampler,
-      { ddx, ddy, uvwStart, offset }
+      calls,
+      results,
+      'f',
+      texture
     );
+    t.expectOK(res);
   });
 
 g.test('sampled_3d_coords')
@@ -235,17 +251,17 @@ Parameters:
     u
       .combine('format', kTestableColorFormats)
       .filter(t => isPotentiallyFilterableAndFillable(t.format))
-      .combine('viewDimension', ['3d', 'cube'] as const)
-      .filter(t => !isCompressedTextureFormat(t.format) || t.viewDimension === 'cube')
-      .combine('samplePoints', kCubeSamplePointMethods)
-      .filter(t => t.samplePoints !== 'cube-edges' || t.viewDimension !== '3d')
-      .beginSubcases()
-      .combine('addressModeU', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const)
-      .combine('addressModeV', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const)
-      .combine('addressModeW', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const)
-      .combine('minFilter', ['nearest', 'linear'] as const)
+      .combine('dim', ['3d', 'cube'] as const)
+      .filter(t => isSupportedViewFormatCombo(t.format, t.dim))
+      .combine('filt', ['nearest', 'linear'] as const)
+      .combine('modeU', kShortAddressModes)
+      .combine('modeV', kShortAddressModes)
+      .combine('modeW', kShortAddressModes)
       .combine('offset', [false, true] as const)
-      .filter(t => t.viewDimension !== 'cube' || t.offset !== true)
+      .filter(t => t.dim !== 'cube' || t.offset !== true)
+      .beginSubcases()
+      .combine('samplePoints', kCubeSamplePointMethods)
+      .filter(t => t.samplePoints !== 'cube-edges' || t.dim !== '3d')
   )
   .beforeAllSubcases(t =>
     skipIfTextureFormatNotSupportedNotAvailableOrNotFilterable(t, t.params.format)
@@ -253,85 +269,93 @@ Parameters:
   .fn(async t => {
     const {
       format,
-      viewDimension,
+      dim: viewDimension,
       samplePoints,
-      addressModeU,
-      addressModeV,
-      addressModeW,
-      minFilter,
+      modeU,
+      modeV,
+      modeW,
+      filt: minFilter,
       offset,
     } = t.params;
+    skipIfNeedsFilteringAndIsUnfilterable(t, minFilter, format);
 
-    const [width, height] = chooseTextureSize({ minSize: 8, minBlocks: 2, format, viewDimension });
-    const depthOrArrayLayers = getDepthOrArrayLayersForViewDimension(viewDimension);
-
+    const size = chooseTextureSize({ minSize: 8, minBlocks: 2, format, viewDimension });
     const descriptor: GPUTextureDescriptor = {
       format,
       dimension: viewDimension === '3d' ? '3d' : '2d',
       ...(t.isCompatibility && { textureBindingViewDimension: viewDimension }),
-      size: { width, height, depthOrArrayLayers },
+      size,
       usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING,
+      // MAINTENANCE_TODO: test derivatives with cubemaps by just always setting this to 3.
+      mipLevelCount: viewDimension === '3d' ? 3 : 1,
     };
     const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor);
     const sampler: GPUSamplerDescriptor = {
-      addressModeU,
-      addressModeV,
-      addressModeW,
+      addressModeU: kShortAddressModeToAddressMode[modeU],
+      addressModeV: kShortAddressModeToAddressMode[modeV],
+      addressModeW: kShortAddressModeToAddressMode[modeW],
       minFilter,
       magFilter: minFilter,
+      mipmapFilter: minFilter,
     };
 
+    const hashInputs = [
+      format,
+      viewDimension,
+      samplePoints,
+      modeU,
+      modeV,
+      modeW,
+      minFilter,
+      offset,
+    ];
     const calls: TextureCall<vec3>[] = (
       viewDimension === '3d'
         ? generateTextureBuiltinInputs3D(50, {
             method: samplePoints as SamplePointMethods,
             sampler,
             descriptor,
-            hashInputs: [
-              format,
-              viewDimension,
-              samplePoints,
-              addressModeU,
-              addressModeV,
-              addressModeW,
-              minFilter,
-              offset,
-            ],
+            derivatives: true,
+            hashInputs,
           })
         : generateSamplePointsCube(50, {
             method: samplePoints,
             sampler,
             descriptor,
-            hashInputs: [
-              format,
-              viewDimension,
-              samplePoints,
-              addressModeU,
-              addressModeV,
-              addressModeW,
-              minFilter,
-            ],
+            derivatives: true,
+            hashInputs,
           })
-    ).map(({ coords, offset }) => {
+    ).map(({ coords, derivativeMult, offset }) => {
       return {
         builtin: 'textureSample',
         coordType: 'f',
         coords,
+        derivativeMult,
         offset,
       };
     });
     const viewDescriptor = {
       dimension: viewDimension,
     };
-    const textureType = getTextureTypeForTextureViewDimension(viewDimension);
-    const results = await doTextureCalls(t, texture, viewDescriptor, textureType, sampler, calls);
+    const textureType = getTextureTypeForTextureViewDimension(viewDimension)!;
+    const results = await doTextureCalls(
+      t,
+      texture,
+      viewDescriptor,
+      textureType,
+      sampler,
+      calls,
+      'f'
+    );
     const res = await checkCallResults(
       t,
       { texels, descriptor, viewDescriptor },
       textureType,
       sampler,
       calls,
-      results
+      results,
+      'f',
+      texture
     );
     t.expectOK(res);
   });
@@ -355,22 +379,89 @@ Parameters:
       Values outside of this range will result in a shader-creation error.
 `
   )
-  .paramsSubcasesOnly(u =>
+  .params(u =>
     u
-      .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const)
-      .combine('coords', generateCoordBoundaries(2))
-      .combine('offset', generateOffsets(2))
+      .combine('format', kDepthStencilFormats)
+      // filter out stencil only formats
+      .filter(t => isDepthTextureFormat(t.format))
+      // MAINTENANCE_TODO: Remove when support for depth24plus, depth24plus-stencil8, and depth32float-stencil8 is added.
+      .filter(t => isEncodableTextureFormat(t.format))
+      .combine('filt', ['nearest', 'linear'] as const)
+      .combine('modeU', kShortAddressModes)
+      .combine('modeV', kShortAddressModes)
+      .combine('offset', [false, true] as const)
+      .beginSubcases()
+      .combine('samplePoints', kSamplePointMethods)
   )
-  .unimplemented();
+  .fn(async t => {
+    const { format, samplePoints, modeU, modeV, filt: minFilter, offset } = t.params;
+
+    // We want at least 4 blocks or something wide enough for 3 mip levels.
+    const [width, height] = chooseTextureSize({ minSize: 8, minBlocks: 4, format });
+    const descriptor: GPUTextureDescriptor = {
+      format,
+      size: { width, height },
+      usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING,
+      mipLevelCount: 3,
+    };
+    const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor);
+    const sampler: GPUSamplerDescriptor = {
+      addressModeU: kShortAddressModeToAddressMode[modeU],
+      addressModeV: kShortAddressModeToAddressMode[modeV],
+      minFilter,
+      magFilter: minFilter,
+      mipmapFilter: minFilter,
+    };
+
+    const calls: TextureCall<vec2>[] = generateTextureBuiltinInputs2D(50, {
+      sampler,
+      method: samplePoints,
+      descriptor,
+      derivatives: true,
+      offset,
+      hashInputs: [format, samplePoints, modeU, modeV, minFilter, offset],
+    }).map(({ coords, derivativeMult, offset }) => {
+      return {
+        builtin: 'textureSample',
+        coordType: 'f',
+        coords,
+        derivativeMult,
+        offset,
+      };
+    });
+
+    const viewDescriptor = {};
+    const textureType = 'texture_depth_2d';
+    const results = await doTextureCalls(
+      t,
+      texture,
+      viewDescriptor,
+      textureType,
+      sampler,
+      calls,
+      'f'
+    );
+    const res = await checkCallResults(
+      t,
+      { texels, descriptor, viewDescriptor },
+      textureType,
+      sampler,
+      calls,
+      results,
+      'f',
+      texture
+    );
+    t.expectOK(res);
+  });
 
 g.test('sampled_array_2d_coords')
   .specURL('https://www.w3.org/TR/WGSL/#texturesample')
   .desc(
     `
-C is i32 or u32
+A is i32 or u32
 
-fn textureSample(t: texture_2d_array<f32>, s: sampler, coords: vec2<f32>, array_index: C) -> vec4<f32>
-fn textureSample(t: texture_2d_array<f32>, s: sampler, coords: vec2<f32>, array_index: C, offset: vec2<i32>) -> vec4<f32>
+fn textureSample(t: texture_2d_array<f32>, s: sampler, coords: vec2<f32>, array_index: A) -> vec4<f32>
+fn textureSample(t: texture_2d_array<f32>, s: sampler, coords: vec2<f32>, array_index: A, offset: vec2<i32>) -> vec4<f32>
 
 Parameters:
  * t  The sampled, depth, or external texture to sample.
@@ -385,24 +476,94 @@ Parameters:
       Values outside of this range will result in a shader-creation error.
 `
   )
-  .paramsSubcasesOnly(u =>
+  .params(u =>
     u
-      .combine('C', ['i32', 'u32'] as const)
-      .combine('C_value', [-1, 0, 1, 2, 3, 4] as const)
-      .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const)
-      .combine('coords', generateCoordBoundaries(2))
-      /* array_index not param'd as out-of-bounds is implementation specific */
-      .combine('offset', generateOffsets(2))
+      .combine('format', kTestableColorFormats)
+      .filter(t => isPotentiallyFilterableAndFillable(t.format))
+      .combine('filt', ['nearest', 'linear'] as const)
+      .combine('modeU', kShortAddressModes)
+      .combine('modeV', kShortAddressModes)
+      .combine('offset', [false, true] as const)
+      .beginSubcases()
+      .combine('samplePoints', kSamplePointMethods)
+      .combine('A', ['i32', 'u32'] as const)
+  )
+  .beforeAllSubcases(t =>
+    skipIfTextureFormatNotSupportedNotAvailableOrNotFilterable(t, t.params.format)
   )
-  .unimplemented();
+  .fn(async t => {
+    const { format, samplePoints, A, modeU, modeV, filt: minFilter, offset } = t.params;
+    skipIfNeedsFilteringAndIsUnfilterable(t, minFilter, format);
+
+    // We want at least 4 blocks or something wide enough for 3 mip levels.
+    const [width, height] = chooseTextureSize({ minSize: 8, minBlocks: 4, format });
+    const depthOrArrayLayers = 4;
+
+    const descriptor: GPUTextureDescriptor = {
+      format,
+      size: { width, height, depthOrArrayLayers },
+      usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING,
+      mipLevelCount: 3,
+    };
+    const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor);
+    const sampler: GPUSamplerDescriptor = {
+      addressModeU: kShortAddressModeToAddressMode[modeU],
+      addressModeV: kShortAddressModeToAddressMode[modeV],
+      minFilter,
+      magFilter: minFilter,
+      mipmapFilter: minFilter,
+    };
+
+    const calls: TextureCall<vec2>[] = generateTextureBuiltinInputs2D(50, {
+      method: samplePoints,
+      sampler,
+      descriptor,
+      derivatives: true,
+      arrayIndex: { num: texture.depthOrArrayLayers, type: A },
+      offset,
+      hashInputs: [format, samplePoints, A, modeU, modeV, minFilter, offset],
+    }).map(({ coords, derivativeMult, arrayIndex, offset }) => {
+      return {
+        builtin: 'textureSample',
+        coordType: 'f',
+        coords,
+        derivativeMult,
+        arrayIndex,
+        arrayIndexType: A === 'i32' ? 'i' : 'u',
+        offset,
+      };
+    });
+    const textureType = 'texture_2d_array<f32>';
+    const viewDescriptor = {};
+    const results = await doTextureCalls(
+      t,
+      texture,
+      viewDescriptor,
+      textureType,
+      sampler,
+      calls,
+      'f'
+    );
+    const res = await checkCallResults(
+      t,
+      { texels, descriptor, viewDescriptor },
+      textureType,
+      sampler,
+      calls,
+      results,
+      'f',
+      texture
+    );
+    t.expectOK(res);
+  });
 
 g.test('sampled_array_3d_coords')
   .specURL('https://www.w3.org/TR/WGSL/#texturesample')
   .desc(
     `
-C is i32 or u32
+A is i32 or u32
 
-fn textureSample(t: texture_cube_array<f32>, s: sampler, coords: vec3<f32>, array_index: C) -> vec4<f32>
+fn textureSample(t: texture_cube_array<f32>, s: sampler, coords: vec3<f32>, array_index: A) -> vec4<f32>
 
 Parameters:
  * t  The sampled, depth, or external texture to sample.
@@ -411,16 +572,90 @@ Parameters:
  * array_index The 0-based texture array index to sample.
 `
   )
-  .paramsSubcasesOnly(
-    u =>
-      u
-        .combine('C', ['i32', 'u32'] as const)
-        .combine('C_value', [-1, 0, 1, 2, 3, 4] as const)
-        .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const)
-        .combine('coords', generateCoordBoundaries(3))
-    /* array_index not param'd as out-of-bounds is implementation specific */
+  .params(u =>
+    u
+      .combine('format', kTestableColorFormats)
+      .filter(t => isPotentiallyFilterableAndFillable(t.format))
+      .combine('filt', ['nearest', 'linear'] as const)
+      .combine('mode', kShortAddressModes)
+      .beginSubcases()
+      .combine('samplePoints', kCubeSamplePointMethods)
+      .combine('A', ['i32', 'u32'] as const)
   )
-  .unimplemented();
+  .beforeAllSubcases(t => {
+    skipIfTextureFormatNotSupportedNotAvailableOrNotFilterable(t, t.params.format);
+    t.skipIfTextureViewDimensionNotSupported('cube-array');
+  })
+  .fn(async t => {
+    const { format, samplePoints, A, mode, filt: minFilter } = t.params;
+    skipIfNeedsFilteringAndIsUnfilterable(t, minFilter, format);
+
+    const viewDimension: GPUTextureViewDimension = 'cube-array';
+    const size = chooseTextureSize({
+      minSize: 32,
+      minBlocks: 4,
+      format,
+      viewDimension,
+    });
+    const descriptor: GPUTextureDescriptor = {
+      format,
+      size,
+      usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING,
+      // MAINTENANCE_TODO: test derivatives with cubemaps by setting this to 3.
+      mipLevelCount: 1,
+    };
+    const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor);
+    const sampler: GPUSamplerDescriptor = {
+      addressModeU: kShortAddressModeToAddressMode[mode],
+      addressModeV: kShortAddressModeToAddressMode[mode],
+      addressModeW: kShortAddressModeToAddressMode[mode],
+      minFilter,
+      magFilter: minFilter,
+      mipmapFilter: minFilter,
+    };
+
+    const calls: TextureCall<vec3>[] = generateSamplePointsCube(50, {
+      method: samplePoints,
+      sampler,
+      descriptor,
+      derivatives: true,
+      arrayIndex: { num: texture.depthOrArrayLayers / 6, type: A },
+      hashInputs: [format, viewDimension, A, samplePoints, mode, minFilter],
+    }).map(({ coords, derivativeMult, arrayIndex }) => {
+      return {
+        builtin: 'textureSample',
+        coordType: 'f',
+        coords,
+        derivativeMult,
+        arrayIndex,
+        arrayIndexType: A === 'i32' ? 'i' : 'u',
+      };
+    });
+    const viewDescriptor = {
+      dimension: viewDimension,
+    };
+    const textureType = getTextureTypeForTextureViewDimension(viewDimension);
+    const results = await doTextureCalls(
+      t,
+      texture,
+      viewDescriptor,
+      textureType,
+      sampler,
+      calls,
+      'f'
+    );
+    const res = await checkCallResults(
+      t,
+      { texels, descriptor, viewDescriptor },
+      textureType,
+      sampler,
+      calls,
+      results,
+      'f',
+      texture
+    );
+    t.expectOK(res);
+  });
 
 g.test('depth_3d_coords')
   .specURL('https://www.w3.org/TR/WGSL/#texturesample')
@@ -434,21 +669,106 @@ Parameters:
  * coords The texture coordinates used for sampling.
 `
   )
-  .paramsSubcasesOnly(u =>
+  .params(u =>
     u
-      .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const)
-      .combine('coords', generateCoordBoundaries(3))
+      .combine('format', kDepthStencilFormats)
+      // filter out stencil only formats
+      .filter(t => isDepthTextureFormat(t.format))
+      // MAINTENANCE_TODO: Remove when support for depth24plus, depth24plus-stencil8, and depth32float-stencil8 is added.
+      .filter(t => isEncodableTextureFormat(t.format))
+      .combineWithParams([
+        { viewDimension: 'cube' },
+        { viewDimension: 'cube-array', A: 'i32' },
+        { viewDimension: 'cube-array', A: 'u32' },
+      ] as const)
+      .combine('filt', ['nearest', 'linear'] as const)
+      .combine('mode', kShortAddressModes)
+      .beginSubcases()
+      .combine('samplePoints', kCubeSamplePointMethods)
   )
-  .unimplemented();
+  .beforeAllSubcases(t => {
+    t.skipIfTextureViewDimensionNotSupported(t.params.viewDimension);
+  })
+  .fn(async t => {
+    const { format, viewDimension, samplePoints, A, mode, filt: minFilter } = t.params;
+
+    const size = chooseTextureSize({
+      minSize: 32,
+      minBlocks: 4,
+      format,
+      viewDimension,
+    });
+    const descriptor: GPUTextureDescriptor = {
+      format,
+      size,
+      usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING,
+      // MAINTENANCE_TODO: test derivatives with cubemaps by setting this to 3.
+      mipLevelCount: 1,
+      ...(t.isCompatibility && { textureBindingViewDimension: viewDimension }),
+    };
+    const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor);
+    const sampler: GPUSamplerDescriptor = {
+      addressModeU: kShortAddressModeToAddressMode[mode],
+      addressModeV: kShortAddressModeToAddressMode[mode],
+      addressModeW: kShortAddressModeToAddressMode[mode],
+      minFilter,
+      magFilter: minFilter,
+      mipmapFilter: minFilter,
+    };
+
+    const calls: TextureCall<vec3>[] = generateSamplePointsCube(50, {
+      method: samplePoints,
+      sampler,
+      descriptor,
+      derivatives: true,
+      arrayIndex: A ? { num: texture.depthOrArrayLayers / 6, type: A } : undefined,
+      hashInputs: [format, viewDimension, samplePoints, mode, minFilter],
+    }).map(({ coords, derivativeMult, arrayIndex }) => {
+      return {
+        builtin: 'textureSample',
+        coordType: 'f',
+        coords,
+        derivativeMult,
+        arrayIndex,
+        arrayIndexType: A ? (A === 'i32' ? 'i' : 'u') : undefined,
+      };
+    });
+    const viewDescriptor = {
+      dimension: viewDimension,
+    };
+    const textureType =
+      viewDimension === 'cube' ? 'texture_depth_cube' : 'texture_depth_cube_array';
+    const results = await doTextureCalls(
+      t,
+      texture,
+      viewDescriptor,
+      textureType,
+      sampler,
+      calls,
+      'f'
+    );
+
+    const res = await checkCallResults(
+      t,
+      { texels, descriptor, viewDescriptor },
+      textureType,
+      sampler,
+      calls,
+      results,
+      'f',
+      texture
+    );
+    t.expectOK(res);
+  });
 
 g.test('depth_array_2d_coords')
   .specURL('https://www.w3.org/TR/WGSL/#texturesample')
   .desc(
     `
-C is i32 or u32
+A is i32 or u32
 
-fn textureSample(t: texture_depth_2d_array, s: sampler, coords: vec2<f32>, array_index: C) -> f32
-fn textureSample(t: texture_depth_2d_array, s: sampler, coords: vec2<f32>, array_index: C, offset: vec2<i32>) -> f32
+fn textureSample(t: texture_depth_2d_array, s: sampler, coords: vec2<f32>, array_index: A) -> f32
+fn textureSample(t: texture_depth_2d_array, s: sampler, coords: vec2<f32>, array_index: A, offset: vec2<i32>) -> f32
 
 Parameters:
  * t  The sampled, depth, or external texture to sample.
@@ -463,24 +783,92 @@ Parameters:
       Values outside of this range will result in a shader-creation error.
 `
   )
-  .paramsSubcasesOnly(u =>
+  .params(u =>
     u
-      .combine('C', ['i32', 'u32'] as const)
-      .combine('C_value', [-1, 0, 1, 2, 3, 4] as const)
-      .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const)
-      .combine('coords', generateCoordBoundaries(2))
-      /* array_index not param'd as out-of-bounds is implementation specific */
-      .combine('offset', generateOffsets(2))
+      .combine('format', kDepthStencilFormats)
+      // filter out stencil only formats
+      .filter(t => isDepthTextureFormat(t.format))
+      // MAINTENANCE_TODO: Remove when support for depth24plus, depth24plus-stencil8, and depth32float-stencil8 is added.
+      .filter(t => isEncodableTextureFormat(t.format))
+      .combine('filt', ['nearest', 'linear'] as const)
+      .combine('mode', kShortAddressModes)
+      .combine('offset', [false, true] as const)
+      .beginSubcases()
+      .combine('samplePoints', kSamplePointMethods)
+      .combine('A', ['i32', 'u32'] as const)
+      .combine('L', ['i32', 'u32'] as const)
   )
-  .unimplemented();
+  .fn(async t => {
+    const { format, samplePoints, mode, filt: minFilter, A, L, offset } = t.params;
+
+    // We want at least 4 blocks or something wide enough for 3 mip levels.
+    const [width, height] = chooseTextureSize({ minSize: 8, minBlocks: 4, format });
+    const descriptor: GPUTextureDescriptor = {
+      format,
+      size: { width, height },
+      usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING,
+      mipLevelCount: 3,
+      ...(t.isCompatibility && { textureBindingViewDimension: '2d-array' }),
+    };
+    const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor);
+    const sampler: GPUSamplerDescriptor = {
+      addressModeU: kShortAddressModeToAddressMode[mode],
+      addressModeV: kShortAddressModeToAddressMode[mode],
+      minFilter,
+      magFilter: minFilter,
+      mipmapFilter: minFilter,
+    };
+
+    const calls: TextureCall<vec2>[] = generateTextureBuiltinInputs2D(50, {
+      method: samplePoints,
+      sampler,
+      descriptor,
+      derivatives: true,
+      arrayIndex: { num: texture.depthOrArrayLayers, type: A },
+      offset,
+      hashInputs: [format, samplePoints, mode, minFilter, L, A, offset],
+    }).map(({ coords, derivativeMult, arrayIndex, offset }) => {
+      return {
+        builtin: 'textureSample',
+        coordType: 'f',
+        coords,
+        derivativeMult,
+        arrayIndex,
+        arrayIndexType: A === 'i32' ? 'i' : 'u',
+        offset,
+      };
+    });
+    const textureType = 'texture_depth_2d_array';
+    const viewDescriptor: GPUTextureViewDescriptor = { dimension: '2d-array' };
+    const results = await doTextureCalls(
+      t,
+      texture,
+      viewDescriptor,
+      textureType,
+      sampler,
+      calls,
+      'f'
+    );
+    const res = await checkCallResults(
+      t,
+      { texels, descriptor, viewDescriptor },
+      textureType,
+      sampler,
+      calls,
+      results,
+      'f',
+      texture
+    );
+    t.expectOK(res);
+  });
 
 g.test('depth_array_3d_coords')
   .specURL('https://www.w3.org/TR/WGSL/#texturesample')
   .desc(
     `
-C is i32 or u32
+A is i32 or u32
 
-fn textureSample(t: texture_depth_cube_array, s: sampler, coords: vec3<f32>, array_index: C) -> f32
+fn textureSample(t: texture_depth_cube_array, s: sampler, coords: vec3<f32>, array_index: A) -> f32
 
 Parameters:
  * t  The sampled, depth, or external texture to sample.
@@ -489,13 +877,90 @@ Parameters:
  * array_index The 0-based texture array index to sample.
 `
   )
-  .paramsSubcasesOnly(
-    u =>
-      u
-        .combine('C', ['i32', 'u32'] as const)
-        .combine('C_value', [-1, 0, 1, 2, 3, 4] as const)
-        .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const)
-        .combine('coords', generateCoordBoundaries(3))
-    /* array_index not param'd as out-of-bounds is implementation specific */
+  .params(u =>
+    u
+      .combine('format', kDepthStencilFormats)
+      // filter out stencil only formats
+      .filter(t => isDepthTextureFormat(t.format))
+      // MAINTENANCE_TODO: Remove when support for depth24plus, depth24plus-stencil8, and depth32float-stencil8 is added.
+      .filter(t => isEncodableTextureFormat(t.format))
+      .combine('filt', ['nearest', 'linear'] as const)
+      .combine('mode', kShortAddressModes)
+      .beginSubcases()
+      .combine('samplePoints', kCubeSamplePointMethods)
+      .combine('A', ['i32', 'u32'] as const)
   )
-  .unimplemented();
+  .beforeAllSubcases(t => {
+    t.skipIfTextureViewDimensionNotSupported('cube-array');
+  })
+  .fn(async t => {
+    const { format, samplePoints, A, mode, filt: minFilter } = t.params;
+
+    const viewDimension: GPUTextureViewDimension = 'cube-array';
+    const size = chooseTextureSize({
+      minSize: 32,
+      minBlocks: 4,
+      format,
+      viewDimension,
+    });
+    const descriptor: GPUTextureDescriptor = {
+      format,
+      size,
+      usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING,
+      // MAINTENANCE_TODO: test derivatives with cubemaps by setting this to 3.
+      mipLevelCount: 1,
+      ...(t.isCompatibility && { textureBindingViewDimension: viewDimension }),
+    };
+    const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor);
+    const sampler: GPUSamplerDescriptor = {
+      addressModeU: kShortAddressModeToAddressMode[mode],
+      addressModeV: kShortAddressModeToAddressMode[mode],
+      addressModeW: kShortAddressModeToAddressMode[mode],
+      minFilter,
+      magFilter: minFilter,
+      mipmapFilter: minFilter,
+    };
+
+    const calls: TextureCall<vec3>[] = generateSamplePointsCube(50, {
+      method: samplePoints,
+      sampler,
+      descriptor,
+      derivatives: true,
+      arrayIndex: A ? { num: texture.depthOrArrayLayers / 6, type: A } : undefined,
+      hashInputs: [format, viewDimension, samplePoints, mode, minFilter],
+    }).map(({ coords, derivativeMult, arrayIndex }) => {
+      return {
+        builtin: 'textureSample',
+        coordType: 'f',
+        coords,
+        derivativeMult,
+        arrayIndex,
+        arrayIndexType: A ? (A === 'i32' ? 'i' : 'u') : undefined,
+      };
+    });
+    const viewDescriptor = {
+      dimension: viewDimension,
+    };
+    const textureType = 'texture_depth_cube_array';
+    const results = await doTextureCalls(
+      t,
+      texture,
+      viewDescriptor,
+      textureType,
+      sampler,
+      calls,
+      'f'
+    );
+
+    const res = await checkCallResults(
+      t,
+      { texels, descriptor, viewDescriptor },
+      textureType,
+      sampler,
+      calls,
+      results,
+      'f',
+      texture
+    );
+    t.expectOK(res);
+  });
diff --git a/src/webgpu/shader/execution/expression/call/builtin/textureSampleBaseClampToEdge.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/textureSampleBaseClampToEdge.spec.ts
index 452c3b4df710..b14297876995 100644
--- a/src/webgpu/shader/execution/expression/call/builtin/textureSampleBaseClampToEdge.spec.ts
+++ b/src/webgpu/shader/execution/expression/call/builtin/textureSampleBaseClampToEdge.spec.ts
@@ -13,6 +13,9 @@ import {
   doTextureCalls,
   generateTextureBuiltinInputs2D,
   kSamplePointMethods,
+  kShortAddressModes,
+  kShortAddressModeToAddressMode,
+  kShortShaderStages,
   TextureCall,
   vec2,
   WGSLTextureSampleTest,
@@ -54,15 +57,22 @@ Parameters:
   )
   .params(u =>
     u
+      .combine('stage', kShortShaderStages)
       .combine('textureType', ['texture_2d<f32>', 'texture_external'] as const)
+      .combine('filt', ['nearest', 'linear'] as const)
+      .combine('modeU', kShortAddressModes)
+      .combine('modeV', kShortAddressModes)
       .beginSubcases()
       .combine('samplePoints', kSamplePointMethods)
-      .combine('addressModeU', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const)
-      .combine('addressModeV', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const)
-      .combine('minFilter', ['nearest', 'linear'] as const)
+  )
+  .beforeAllSubcases(t =>
+    t.skipIf(
+      t.params.textureType === 'texture_external' && typeof VideoFrame === 'undefined',
+      'VideoFrames are not supported'
+    )
   )
   .fn(async t => {
-    const { textureType, samplePoints, addressModeU, addressModeV, minFilter } = t.params;
+    const { textureType, stage, samplePoints, modeU, modeV, filt: minFilter } = t.params;
 
     const descriptor: GPUTextureDescriptor = {
       format: 'rgba8unorm',
@@ -79,8 +89,8 @@ Parameters:
     );
     try {
       const sampler: GPUSamplerDescriptor = {
-        addressModeU,
-        addressModeV,
+        addressModeU: kShortAddressModeToAddressMode[modeU],
+        addressModeV: kShortAddressModeToAddressMode[modeV],
         minFilter,
         magFilter: minFilter,
         mipmapFilter: minFilter,
@@ -90,7 +100,7 @@ Parameters:
         method: samplePoints,
         sampler,
         descriptor,
-        hashInputs: [samplePoints, addressModeU, addressModeV, minFilter],
+        hashInputs: [samplePoints, modeU, modeV, minFilter],
       }).map(({ coords }) => {
         return {
           builtin: 'textureSampleBaseClampToEdge',
@@ -99,14 +109,23 @@ Parameters:
         };
       });
       const viewDescriptor = {};
-      const results = await doTextureCalls(t, texture, viewDescriptor, textureType, sampler, calls);
+      const results = await doTextureCalls(
+        t,
+        texture,
+        viewDescriptor,
+        textureType,
+        sampler,
+        calls,
+        stage
+      );
       const res = await checkCallResults(
         t,
         { texels, descriptor, viewDescriptor },
         textureType,
         sampler,
         calls,
-        results
+        results,
+        stage
       );
       t.expectOK(res);
     } finally {
diff --git a/src/webgpu/shader/execution/expression/call/builtin/textureSampleBias.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/textureSampleBias.spec.ts
index 1c61c1a5f217..f49322f878d6 100644
--- a/src/webgpu/shader/execution/expression/call/builtin/textureSampleBias.spec.ts
+++ b/src/webgpu/shader/execution/expression/call/builtin/textureSampleBias.spec.ts
@@ -2,14 +2,42 @@ export const description = `
 Execution tests for the 'textureSampleBias' builtin function
 
 Samples a texture with a bias to the mip level.
+
+- TODO: test cube maps with more than one mip level.
+- TODO: Test un-encodable formats.
 `;
 
 import { makeTestGroup } from '../../../../../../common/framework/test_group.js';
-import { GPUTest } from '../../../../../gpu_test.js';
+import { kCompressedTextureFormats, kEncodableTextureFormats } from '../../../../../format_info.js';
+import { TextureTestMixin } from '../../../../../gpu_test.js';
+
+import {
+  vec2,
+  vec3,
+  TextureCall,
+  generateTextureBuiltinInputs2D,
+  generateTextureBuiltinInputs3D,
+  kSamplePointMethods,
+  kShortAddressModes,
+  kShortAddressModeToAddressMode,
+  doTextureCalls,
+  checkCallResults,
+  createTextureWithRandomDataAndGetTexels,
+  generateSamplePointsCube,
+  kCubeSamplePointMethods,
+  SamplePointMethods,
+  chooseTextureSize,
+  isPotentiallyFilterableAndFillable,
+  skipIfTextureFormatNotSupportedNotAvailableOrNotFilterable,
+  getTextureTypeForTextureViewDimension,
+  WGSLTextureSampleTest,
+  isSupportedViewFormatCombo,
+  skipIfNeedsFilteringAndIsUnfilterable,
+} from './texture_utils.js';
 
-import { generateCoordBoundaries, generateOffsets } from './utils.js';
+const kTestableColorFormats = [...kEncodableTextureFormats, ...kCompressedTextureFormats] as const;
 
-export const g = makeTestGroup(GPUTest);
+export const g = makeTestGroup(TextureTestMixin(WGSLTextureSampleTest));
 
 g.test('sampled_2d_coords')
   .specURL('https://www.w3.org/TR/WGSL/#texturesamplebias')
@@ -31,14 +59,82 @@ Parameters:
       Values outside of this range will result in a shader-creation error.
 `
   )
-  .paramsSubcasesOnly(u =>
+  .params(u =>
     u
-      .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat'])
-      .combine('coords', generateCoordBoundaries(2))
-      .combine('bias', [-16.1, -16, 0, 1, 15.99, 16] as const)
-      .combine('offset', generateOffsets(2))
+      .combine('format', kTestableColorFormats)
+      .filter(t => isPotentiallyFilterableAndFillable(t.format))
+      .combine('filt', ['nearest', 'linear'] as const)
+      .combine('modeU', kShortAddressModes)
+      .combine('modeV', kShortAddressModes)
+      .combine('offset', [false, true] as const)
+      .beginSubcases()
+      .combine('samplePoints', kSamplePointMethods)
+  )
+  .beforeAllSubcases(t =>
+    skipIfTextureFormatNotSupportedNotAvailableOrNotFilterable(t, t.params.format)
   )
-  .unimplemented();
+  .fn(async t => {
+    const { format, samplePoints, modeU, modeV, filt: minFilter, offset } = t.params;
+    skipIfNeedsFilteringAndIsUnfilterable(t, minFilter, format);
+
+    // We want at least 4 blocks or something wide enough for 3 mip levels.
+    const [width, height] = chooseTextureSize({ minSize: 8, minBlocks: 4, format });
+
+    const descriptor: GPUTextureDescriptor = {
+      format,
+      size: { width, height },
+      usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING,
+      mipLevelCount: 3,
+    };
+    const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor);
+    const sampler: GPUSamplerDescriptor = {
+      addressModeU: kShortAddressModeToAddressMode[modeU],
+      addressModeV: kShortAddressModeToAddressMode[modeV],
+      minFilter,
+      magFilter: minFilter,
+      mipmapFilter: minFilter,
+    };
+
+    const calls: TextureCall<vec2>[] = generateTextureBuiltinInputs2D(50, {
+      sampler,
+      method: samplePoints,
+      descriptor,
+      bias: true,
+      offset,
+      hashInputs: [format, samplePoints, modeU, modeV, minFilter, offset],
+    }).map(({ coords, derivativeMult, offset, bias }) => {
+      return {
+        builtin: 'textureSampleBias',
+        coordType: 'f',
+        coords,
+        derivativeMult,
+        bias,
+        offset,
+      };
+    });
+    const viewDescriptor = {};
+    const textureType = 'texture_2d<f32>';
+    const results = await doTextureCalls(
+      t,
+      texture,
+      viewDescriptor,
+      textureType,
+      sampler,
+      calls,
+      'f'
+    );
+    const res = await checkCallResults(
+      t,
+      { texels, descriptor, viewDescriptor },
+      textureType,
+      sampler,
+      calls,
+      results,
+      'f',
+      texture
+    );
+    t.expectOK(res);
+  });
 
 g.test('sampled_3d_coords')
   .specURL('https://www.w3.org/TR/WGSL/#texturesamplebias')
@@ -63,23 +159,126 @@ Parameters:
   )
   .params(u =>
     u
-      .combine('texture_type', ['texture_3d', 'texture_cube'] as const)
+      .combine('format', kTestableColorFormats)
+      .filter(t => isPotentiallyFilterableAndFillable(t.format))
+      .combine('dim', ['3d', 'cube'] as const)
+      .filter(t => isSupportedViewFormatCombo(t.format, t.dim))
+      .combine('filt', ['nearest', 'linear'] as const)
+      .combine('modeU', kShortAddressModes)
+      .combine('modeV', kShortAddressModes)
+      .combine('modeW', kShortAddressModes)
+      .combine('offset', [false, true] as const)
+      .filter(t => t.dim !== 'cube' || t.offset !== true)
       .beginSubcases()
-      .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat'])
-      .combine('coords', generateCoordBoundaries(3))
-      .combine('bias', [-16.1, -16, 0, 1, 15.99, 16] as const)
-      .combine('offset', generateOffsets(3))
+      .combine('samplePoints', kCubeSamplePointMethods)
+      .filter(t => t.samplePoints !== 'cube-edges' || t.dim !== '3d')
+  )
+  .beforeAllSubcases(t =>
+    skipIfTextureFormatNotSupportedNotAvailableOrNotFilterable(t, t.params.format)
   )
-  .unimplemented();
+  .fn(async t => {
+    const {
+      format,
+      dim: viewDimension,
+      samplePoints,
+      modeU,
+      modeV,
+      modeW,
+      filt: minFilter,
+      offset,
+    } = t.params;
+    skipIfNeedsFilteringAndIsUnfilterable(t, minFilter, format);
+
+    const size = chooseTextureSize({ minSize: 8, minBlocks: 2, format, viewDimension });
+    const descriptor: GPUTextureDescriptor = {
+      format,
+      dimension: viewDimension === '3d' ? '3d' : '2d',
+      ...(t.isCompatibility && { textureBindingViewDimension: viewDimension }),
+      size,
+      // MAINTENANCE_TODO: use 3 for cube maps when derivatives are supported for cube maps.
+      mipLevelCount: viewDimension === '3d' ? 3 : 1,
+      usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING,
+    };
+    const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor);
+    const sampler: GPUSamplerDescriptor = {
+      addressModeU: kShortAddressModeToAddressMode[modeU],
+      addressModeV: kShortAddressModeToAddressMode[modeV],
+      addressModeW: kShortAddressModeToAddressMode[modeW],
+      minFilter,
+      magFilter: minFilter,
+    };
+
+    const hashInputs = [
+      format,
+      viewDimension,
+      samplePoints,
+      modeU,
+      modeV,
+      modeW,
+      minFilter,
+      offset,
+    ];
+    const calls: TextureCall<vec3>[] = (
+      viewDimension === '3d'
+        ? generateTextureBuiltinInputs3D(50, {
+            method: samplePoints as SamplePointMethods,
+            sampler,
+            descriptor,
+            bias: true,
+            offset,
+            hashInputs,
+          })
+        : generateSamplePointsCube(50, {
+            method: samplePoints,
+            sampler,
+            descriptor,
+            bias: true,
+            hashInputs,
+          })
+    ).map(({ coords, derivativeMult, offset, bias }) => {
+      return {
+        builtin: 'textureSampleBias',
+        coordType: 'f',
+        coords,
+        derivativeMult,
+        bias,
+        offset,
+      };
+    });
+    const viewDescriptor = {
+      dimension: viewDimension,
+    };
+    const textureType = getTextureTypeForTextureViewDimension(viewDimension)!;
+    const results = await doTextureCalls(
+      t,
+      texture,
+      viewDescriptor,
+      textureType,
+      sampler,
+      calls,
+      'f'
+    );
+    const res = await checkCallResults(
+      t,
+      { texels, descriptor, viewDescriptor },
+      textureType,
+      sampler,
+      calls,
+      results,
+      'f',
+      texture
+    );
+    t.expectOK(res);
+  });
 
 g.test('arrayed_2d_coords')
   .specURL('https://www.w3.org/TR/WGSL/#texturesamplebias')
   .desc(
     `
-C: i32, u32
+A: i32, u32
 
-fn textureSampleBias(t: texture_2d_array<f32>, s: sampler, coords: vec2<f32>, array_index: C, bias: f32) -> vec4<f32>
-fn textureSampleBias(t: texture_2d_array<f32>, s: sampler, coords: vec2<f32>, array_index: C, bias: f32, offset: vec2<i32>) -> vec4<f32>
+fn textureSampleBias(t: texture_2d_array<f32>, s: sampler, coords: vec2<f32>, array_index: A, bias: f32) -> vec4<f32>
+fn textureSampleBias(t: texture_2d_array<f32>, s: sampler, coords: vec2<f32>, array_index: A, bias: f32, offset: vec2<i32>) -> vec4<f32>
 
 Parameters:
  * t: The sampled texture to read from
@@ -95,25 +294,95 @@ Parameters:
       Values outside of this range will result in a shader-creation error.
 `
   )
-  .paramsSubcasesOnly(u =>
+  .params(u =>
     u
-      .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat'])
-      .combine('coords', generateCoordBoundaries(2))
-      .combine('C', ['i32', 'u32'] as const)
-      .combine('C_value', [-1, 0, 1, 2, 3, 4] as const)
-      /* array_index not param'd as out-of-bounds is implementation specific */
-      .combine('bias', [-16.1, -16, 0, 1, 15.99, 16] as const)
-      .combine('offset', generateOffsets(2))
+      .combine('format', kTestableColorFormats)
+      .filter(t => isPotentiallyFilterableAndFillable(t.format))
+      .combine('filt', ['nearest', 'linear'] as const)
+      .combine('modeU', kShortAddressModes)
+      .combine('modeV', kShortAddressModes)
+      .combine('offset', [false, true] as const)
+      .beginSubcases()
+      .combine('samplePoints', kSamplePointMethods)
+      .combine('A', ['i32', 'u32'] as const)
+  )
+  .beforeAllSubcases(t =>
+    skipIfTextureFormatNotSupportedNotAvailableOrNotFilterable(t, t.params.format)
   )
-  .unimplemented();
+  .fn(async t => {
+    const { format, samplePoints, A, modeU, modeV, filt: minFilter, offset } = t.params;
+    skipIfNeedsFilteringAndIsUnfilterable(t, minFilter, format);
+
+    // We want at least 4 blocks or something wide enough for 3 mip levels.
+    const [width, height] = chooseTextureSize({ minSize: 8, minBlocks: 4, format });
+    const depthOrArrayLayers = 4;
+
+    const descriptor: GPUTextureDescriptor = {
+      format,
+      size: { width, height, depthOrArrayLayers },
+      usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING,
+      mipLevelCount: 3,
+    };
+    const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor);
+    const sampler: GPUSamplerDescriptor = {
+      addressModeU: kShortAddressModeToAddressMode[modeU],
+      addressModeV: kShortAddressModeToAddressMode[modeV],
+      minFilter,
+      magFilter: minFilter,
+      mipmapFilter: minFilter,
+    };
+
+    const calls: TextureCall<vec2>[] = generateTextureBuiltinInputs2D(50, {
+      method: samplePoints,
+      sampler,
+      descriptor,
+      arrayIndex: { num: texture.depthOrArrayLayers, type: A },
+      bias: true,
+      offset,
+      hashInputs: [format, samplePoints, A, modeU, modeV, minFilter, offset],
+    }).map(({ coords, derivativeMult, arrayIndex, bias, offset }) => {
+      return {
+        builtin: 'textureSampleBias',
+        coordType: 'f',
+        coords,
+        derivativeMult,
+        arrayIndex,
+        arrayIndexType: A === 'i32' ? 'i' : 'u',
+        bias,
+        offset,
+      };
+    });
+    const textureType = 'texture_2d_array<f32>';
+    const viewDescriptor = {};
+    const results = await doTextureCalls(
+      t,
+      texture,
+      viewDescriptor,
+      textureType,
+      sampler,
+      calls,
+      'f'
+    );
+    const res = await checkCallResults(
+      t,
+      { texels, descriptor, viewDescriptor },
+      textureType,
+      sampler,
+      calls,
+      results,
+      'f',
+      texture
+    );
+    t.expectOK(res);
+  });
 
 g.test('arrayed_3d_coords')
   .specURL('https://www.w3.org/TR/WGSL/#texturesamplebias')
   .desc(
     `
-C: i32, u32
+A: i32, u32
 
-fn textureSampleBias(t: texture_cube_array<f32>, s: sampler, coords: vec3<f32>, array_index: C, bias: f32) -> vec4<f32>
+fn textureSampleBias(t: texture_cube_array<f32>, s: sampler, coords: vec3<f32>, array_index: A, bias: f32) -> vec4<f32>
 
 Parameters:
  * t: The sampled texture to read from
@@ -129,13 +398,88 @@ Parameters:
       Values outside of this range will result in a shader-creation error.
 `
   )
-  .paramsSubcasesOnly(u =>
+  .params(u =>
     u
-      .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat'])
-      .combine('coords', generateCoordBoundaries(3))
-      .combine('C', ['i32', 'u32'] as const)
-      .combine('C_value', [-1, 0, 1, 2, 3, 4] as const)
-      /* array_index not param'd as out-of-bounds is implementation specific */
-      .combine('bias', [-16.1, -16, 0, 1, 15.99, 16] as const)
+      .combine('format', kTestableColorFormats)
+      .filter(t => isPotentiallyFilterableAndFillable(t.format))
+      .combine('filt', ['nearest', 'linear'] as const)
+      .combine('mode', kShortAddressModes)
+      .beginSubcases()
+      .combine('samplePoints', kCubeSamplePointMethods)
+      .combine('A', ['i32', 'u32'] as const)
   )
-  .unimplemented();
+  .beforeAllSubcases(t => {
+    skipIfTextureFormatNotSupportedNotAvailableOrNotFilterable(t, t.params.format);
+    t.skipIfTextureViewDimensionNotSupported('cube-array');
+  })
+  .fn(async t => {
+    const { format, samplePoints, A, mode, filt: minFilter } = t.params;
+    skipIfNeedsFilteringAndIsUnfilterable(t, minFilter, format);
+
+    const viewDimension: GPUTextureViewDimension = 'cube-array';
+    const size = chooseTextureSize({
+      minSize: 32,
+      minBlocks: 4,
+      format,
+      viewDimension,
+    });
+    const descriptor: GPUTextureDescriptor = {
+      format,
+      size,
+      usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING,
+      // MAINTENANCE_TODO: use 3 for cube maps when derivatives are supported for cube maps.
+      mipLevelCount: 1,
+    };
+    const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor);
+    const sampler: GPUSamplerDescriptor = {
+      addressModeU: kShortAddressModeToAddressMode[mode],
+      addressModeV: kShortAddressModeToAddressMode[mode],
+      addressModeW: kShortAddressModeToAddressMode[mode],
+      minFilter,
+      magFilter: minFilter,
+      mipmapFilter: minFilter,
+    };
+
+    const calls: TextureCall<vec3>[] = generateSamplePointsCube(50, {
+      method: samplePoints,
+      sampler,
+      descriptor,
+      bias: true,
+      arrayIndex: { num: texture.depthOrArrayLayers / 6, type: A },
+      hashInputs: [format, viewDimension, A, samplePoints, mode, minFilter],
+    }).map(({ coords, derivativeMult, arrayIndex, bias }) => {
+      return {
+        builtin: 'textureSampleBias',
+        coordType: 'f',
+        coords,
+        derivativeMult,
+        arrayIndex,
+        arrayIndexType: A === 'i32' ? 'i' : 'u',
+        bias,
+      };
+    });
+    const viewDescriptor = {
+      dimension: viewDimension,
+    };
+    const textureType = getTextureTypeForTextureViewDimension(viewDimension);
+    const results = await doTextureCalls(
+      t,
+      texture,
+      viewDescriptor,
+      textureType,
+      sampler,
+      calls,
+      'f'
+    );
+    const res = await checkCallResults(
+      t,
+      { texels, descriptor, viewDescriptor },
+      textureType,
+      sampler,
+      calls,
+      results,
+      'f',
+      texture
+    );
+    t.expectOK(res);
+  });
diff --git a/src/webgpu/shader/execution/expression/call/builtin/textureSampleCompare.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/textureSampleCompare.spec.ts
index eae5098257e6..27e55a8b189b 100644
--- a/src/webgpu/shader/execution/expression/call/builtin/textureSampleCompare.spec.ts
+++ b/src/webgpu/shader/execution/expression/call/builtin/textureSampleCompare.spec.ts
@@ -1,13 +1,37 @@
 export const description = `
 Samples a depth texture and compares the sampled depth values against a reference value.
+
+- TODO: test cube maps with more than 1 mip level.
+- TODO: test un-encodable formats.
 `;
 
 import { makeTestGroup } from '../../../../../../common/framework/test_group.js';
-import { GPUTest } from '../../../../../gpu_test.js';
+import { kCompareFunctions } from '../../../../../capability_info.js';
+import {
+  isDepthTextureFormat,
+  isEncodableTextureFormat,
+  kDepthStencilFormats,
+} from '../../../../../format_info.js';
 
-import { generateCoordBoundaries, generateOffsets } from './utils.js';
+import {
+  checkCallResults,
+  chooseTextureSize,
+  createTextureWithRandomDataAndGetTexels,
+  doTextureCalls,
+  generateSamplePointsCube,
+  generateTextureBuiltinInputs2D,
+  kCubeSamplePointMethods,
+  kSamplePointMethods,
+  kShortAddressModes,
+  kShortAddressModeToAddressMode,
+  makeRandomDepthComparisonTexelGenerator,
+  TextureCall,
+  vec2,
+  vec3,
+  WGSLTextureSampleTest,
+} from './texture_utils.js';
 
-export const g = makeTestGroup(GPUTest);
+export const g = makeTestGroup(WGSLTextureSampleTest);
 
 g.test('2d_coords')
   .specURL('https://www.w3.org/TR/WGSL/#texturesamplecompare')
@@ -18,7 +42,7 @@ fn textureSampleCompare(t: texture_depth_2d, s: sampler_comparison, coords: vec2
 
 Parameters:
  * t  The depth texture to sample.
- * s  The sampler_comparision type.
+ * s  The sampler_comparison type.
  * coords The texture coordinates used for sampling.
  * depth_ref The reference value to compare the sampled depth value against.
  * offset
@@ -29,14 +53,86 @@ Parameters:
       Values outside of this range will result in a shader-creation error.
 `
   )
-  .paramsSubcasesOnly(u =>
+  .params(u =>
     u
-      .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat'])
-      .combine('coords', generateCoordBoundaries(2))
-      .combine('depth_ref', [-1 /* smaller ref */, 0 /* equal ref */, 1 /* larger ref */] as const)
-      .combine('offset', generateOffsets(2))
+      .combine('format', kDepthStencilFormats)
+      // filter out stencil only formats
+      .filter(t => isDepthTextureFormat(t.format))
+      // MAINTENANCE_TODO: Remove when support for depth24plus, depth24plus-stencil8, and depth32float-stencil8 is added.
+      .filter(t => isEncodableTextureFormat(t.format))
+      .combine('filt', ['nearest', 'linear'] as const)
+      .combine('modeU', kShortAddressModes)
+      .combine('modeV', kShortAddressModes)
+      .combine('offset', [false, true] as const)
+      .beginSubcases()
+      .combine('samplePoints', kSamplePointMethods)
+      .combine('compare', kCompareFunctions)
   )
-  .unimplemented();
+  .fn(async t => {
+    const { format, samplePoints, modeU, modeV, filt: minFilter, compare, offset } = t.params;
+
+    const size = chooseTextureSize({ minSize: 16, minBlocks: 4, format });
+
+    const descriptor: GPUTextureDescriptor = {
+      format,
+      size,
+      usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING,
+      mipLevelCount: 3,
+    };
+    const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor, {
+      generator: makeRandomDepthComparisonTexelGenerator(descriptor, compare),
+    });
+    const sampler: GPUSamplerDescriptor = {
+      addressModeU: kShortAddressModeToAddressMode[modeU],
+      addressModeV: kShortAddressModeToAddressMode[modeV],
+      compare,
+      minFilter,
+      magFilter: minFilter,
+      mipmapFilter: minFilter,
+    };
+
+    const calls: TextureCall<vec2>[] = generateTextureBuiltinInputs2D(50, {
+      method: samplePoints,
+      textureBuiltin: 'textureSampleCompare',
+      sampler,
+      descriptor,
+      derivatives: true,
+      depthRef: true,
+      offset,
+      hashInputs: [format, samplePoints, modeU, modeV, minFilter, offset],
+    }).map(({ coords, derivativeMult, arrayIndex, depthRef, offset }) => {
+      return {
+        builtin: 'textureSampleCompare',
+        coordType: 'f',
+        coords,
+        derivativeMult,
+        depthRef,
+        offset,
+      };
+    });
+    const textureType = 'texture_depth_2d';
+    const viewDescriptor = {};
+    const results = await doTextureCalls(
+      t,
+      texture,
+      viewDescriptor,
+      textureType,
+      sampler,
+      calls,
+      'f'
+    );
+    const res = await checkCallResults(
+      t,
+      { texels, descriptor, viewDescriptor },
+      textureType,
+      sampler,
+      calls,
+      results,
+      'f',
+      texture
+    );
+    t.expectOK(res);
+  });
 
 g.test('3d_coords')
   .specURL('https://www.w3.org/TR/WGSL/#texturesamplecompare')
@@ -46,31 +142,106 @@ fn textureSampleCompare(t: texture_depth_cube, s: sampler_comparison, coords: ve
 
 Parameters:
  * t  The depth texture to sample.
- * s  The sampler_comparision type.
+ * s  The sampler_comparison type.
  * coords The texture coordinates used for sampling.
  * depth_ref The reference value to compare the sampled depth value against.
 `
   )
-  .paramsSubcasesOnly(u =>
+  .params(u =>
     u
-      .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat'])
-      .combine('coords', generateCoordBoundaries(3))
-      .combine('depth_ref', [-1 /* smaller ref */, 0 /* equal ref */, 1 /* larger ref */] as const)
+      .combine('format', kDepthStencilFormats)
+      // filter out stencil only formats
+      .filter(t => isDepthTextureFormat(t.format))
+      // MAINTENANCE_TODO: Remove when support for depth24plus, depth24plus-stencil8, and depth32float-stencil8 is added.
+      .filter(t => isEncodableTextureFormat(t.format))
+      .combine('filt', ['nearest', 'linear'] as const)
+      .combine('mode', kShortAddressModes)
+      .beginSubcases()
+      .combine('samplePoints', kCubeSamplePointMethods)
+      .combine('compare', kCompareFunctions)
   )
-  .unimplemented();
+  .fn(async t => {
+    const { format, samplePoints, mode, filt: minFilter, compare } = t.params;
+
+    const viewDimension: GPUTextureViewDimension = 'cube';
+    const size = chooseTextureSize({ minSize: 16, minBlocks: 2, format, viewDimension });
+
+    const descriptor: GPUTextureDescriptor = {
+      format,
+      ...(t.isCompatibility && { textureBindingViewDimension: viewDimension }),
+      size,
+      usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING,
+      // MAINTENANCE_TODO: change to 3 once derivatives with cube maps are supported
+      mipLevelCount: 1,
+    };
+    const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor, {
+      generator: makeRandomDepthComparisonTexelGenerator(descriptor, compare),
+    });
+    const sampler: GPUSamplerDescriptor = {
+      addressModeU: kShortAddressModeToAddressMode[mode],
+      addressModeV: kShortAddressModeToAddressMode[mode],
+      addressModeW: kShortAddressModeToAddressMode[mode],
+      compare,
+      minFilter,
+      magFilter: minFilter,
+      mipmapFilter: minFilter,
+    };
+
+    const calls: TextureCall<vec3>[] = generateSamplePointsCube(50, {
+      method: samplePoints,
+      sampler,
+      descriptor,
+      derivatives: true,
+      depthRef: true,
+      textureBuiltin: 'textureSampleCompare',
+      hashInputs: [format, samplePoints, mode, minFilter, compare],
+    }).map(({ coords, derivativeMult, depthRef }) => {
+      return {
+        builtin: 'textureSampleCompare',
+        coordType: 'f',
+        coords,
+        derivativeMult,
+        depthRef,
+      };
+    });
+    const viewDescriptor = {
+      dimension: viewDimension,
+    };
+    const textureType = 'texture_depth_cube';
+    const results = await doTextureCalls(
+      t,
+      texture,
+      viewDescriptor,
+      textureType,
+      sampler,
+      calls,
+      'f'
+    );
+    const res = await checkCallResults(
+      t,
+      { texels, descriptor, viewDescriptor },
+      textureType,
+      sampler,
+      calls,
+      results,
+      'f',
+      texture
+    );
+    t.expectOK(res);
+  });
 
 g.test('arrayed_2d_coords')
   .specURL('https://www.w3.org/TR/WGSL/#texturesamplecompare')
   .desc(
     `
-C is i32 or u32
+A is i32 or u32
 
-fn textureSampleCompare(t: texture_depth_2d_array, s: sampler_comparison, coords: vec2<f32>, array_index: C, depth_ref: f32) -> f32
-fn textureSampleCompare(t: texture_depth_2d_array, s: sampler_comparison, coords: vec2<f32>, array_index: C, depth_ref: f32, offset: vec2<i32>) -> f32
+fn textureSampleCompare(t: texture_depth_2d_array, s: sampler_comparison, coords: vec2<f32>, array_index: A, depth_ref: f32) -> f32
+fn textureSampleCompare(t: texture_depth_2d_array, s: sampler_comparison, coords: vec2<f32>, array_index: A, depth_ref: f32, offset: vec2<i32>) -> f32
 
 Parameters:
  * t  The depth texture to sample.
- * s  The sampler_comparision type.
+ * s  The sampler_comparison type.
  * coords The texture coordinates used for sampling.
  * array_index: The 0-based texture array index to sample.
  * depth_ref The reference value to compare the sampled depth value against.
@@ -82,41 +253,197 @@ Parameters:
       Values outside of this range will result in a shader-creation error.
 `
   )
-  .paramsSubcasesOnly(u =>
+  .params(u =>
     u
-      .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat'])
-      .combine('coords', generateCoordBoundaries(2))
-      .combine('C', ['i32', 'u32'] as const)
-      .combine('C_value', [-1, 0, 1, 2, 3, 4] as const)
-      /* array_index not param'd as out-of-bounds is implementation specific */
-      .combine('depth_ref', [-1 /* smaller ref */, 0 /* equal ref */, 1 /* larger ref */] as const)
-      .combine('offset', generateOffsets(2))
+      .combine('format', kDepthStencilFormats)
+      // filter out stencil only formats
+      .filter(t => isDepthTextureFormat(t.format))
+      // MAINTENANCE_TODO: Remove when support for depth24plus, depth24plus-stencil8, and depth32float-stencil8 is added.
+      .filter(t => isEncodableTextureFormat(t.format))
+      .combine('filt', ['nearest', 'linear'] as const)
+      .combine('modeU', kShortAddressModes)
+      .combine('modeV', kShortAddressModes)
+      .combine('offset', [false, true] as const)
+      .beginSubcases()
+      .combine('samplePoints', kSamplePointMethods)
+      .combine('A', ['i32', 'u32'] as const)
+      .combine('compare', kCompareFunctions)
   )
-  .unimplemented();
+  .beforeAllSubcases(t => {
+    t.skipIfTextureFormatNotSupported(t.params.format);
+  })
+  .fn(async t => {
+    const { format, samplePoints, A, modeU, modeV, filt: minFilter, compare, offset } = t.params;
+
+    const viewDimension = '2d-array';
+    const size = chooseTextureSize({ minSize: 16, minBlocks: 4, format, viewDimension });
+
+    const descriptor: GPUTextureDescriptor = {
+      format,
+      size,
+      usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING,
+      mipLevelCount: 3,
+    };
+    const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor, {
+      generator: makeRandomDepthComparisonTexelGenerator(descriptor, compare),
+    });
+    const sampler: GPUSamplerDescriptor = {
+      addressModeU: kShortAddressModeToAddressMode[modeU],
+      addressModeV: kShortAddressModeToAddressMode[modeV],
+      compare,
+      minFilter,
+      magFilter: minFilter,
+      mipmapFilter: minFilter,
+    };
+
+    const calls: TextureCall<vec2>[] = generateTextureBuiltinInputs2D(50, {
+      method: samplePoints,
+      textureBuiltin: 'textureSampleCompare',
+      sampler,
+      descriptor,
+      derivatives: true,
+      arrayIndex: { num: texture.depthOrArrayLayers, type: A },
+      depthRef: true,
+      offset,
+      hashInputs: [format, samplePoints, A, modeU, modeV, minFilter, offset],
+    }).map(({ coords, derivativeMult, arrayIndex, depthRef, offset }) => {
+      return {
+        builtin: 'textureSampleCompare',
+        coordType: 'f',
+        coords,
+        derivativeMult,
+        arrayIndex,
+        arrayIndexType: A === 'i32' ? 'i' : 'u',
+        depthRef,
+        offset,
+      };
+    });
+    const textureType = 'texture_depth_2d_array';
+    const viewDescriptor = {};
+    const results = await doTextureCalls(
+      t,
+      texture,
+      viewDescriptor,
+      textureType,
+      sampler,
+      calls,
+      'f'
+    );
+    const res = await checkCallResults(
+      t,
+      { texels, descriptor, viewDescriptor },
+      textureType,
+      sampler,
+      calls,
+      results,
+      'f',
+      texture
+    );
+    t.expectOK(res);
+  });
 
 g.test('arrayed_3d_coords')
   .specURL('https://www.w3.org/TR/WGSL/#texturesamplecompare')
   .desc(
     `
-C is i32 or u32
+A is i32 or u32
 
-fn textureSampleCompare(t: texture_depth_cube_array, s: sampler_comparison, coords: vec3<f32>, array_index: C, depth_ref: f32) -> f32
+fn textureSampleCompare(t: texture_depth_cube_array, s: sampler_comparison, coords: vec3<f32>, array_index: A, depth_ref: f32) -> f32
 
 Parameters:
  * t  The depth texture to sample.
- * s  The sampler_comparision type.
+ * s  The sampler_comparison type.
  * coords The texture coordinates used for sampling.
  * array_index: The 0-based texture array index to sample.
  * depth_ref The reference value to compare the sampled depth value against.
 `
   )
-  .paramsSubcasesOnly(u =>
+  .params(u =>
     u
-      .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat'])
-      .combine('coords', generateCoordBoundaries(3))
-      .combine('C', ['i32', 'u32'] as const)
-      .combine('C_value', [-1, 0, 1, 2, 3, 4] as const)
-      /* array_index not param'd as out-of-bounds is implementation specific */
-      .combine('depth_ref', [-1 /* smaller ref */, 0 /* equal ref */, 1 /* larger ref */] as const)
+      .combine('format', kDepthStencilFormats)
+      // filter out stencil only formats
+      .filter(t => isDepthTextureFormat(t.format))
+      // MAINTENANCE_TODO: Remove when support for depth24plus, depth24plus-stencil8, and depth32float-stencil8 is added.
+      .filter(t => isEncodableTextureFormat(t.format))
+      .combine('filt', ['nearest', 'linear'] as const)
+      .combine('mode', kShortAddressModes)
+      .beginSubcases()
+      .combine('samplePoints', kCubeSamplePointMethods)
+      .combine('A', ['i32', 'u32'] as const)
+      .combine('compare', kCompareFunctions)
   )
-  .unimplemented();
+  .beforeAllSubcases(t => {
+    t.skipIfTextureViewDimensionNotSupported('cube-array');
+  })
+  .fn(async t => {
+    const { format, A, samplePoints, mode, filt: minFilter, compare } = t.params;
+
+    const viewDimension: GPUTextureViewDimension = 'cube-array';
+    const size = chooseTextureSize({ minSize: 8, minBlocks: 2, format, viewDimension });
+
+    const descriptor: GPUTextureDescriptor = {
+      format,
+      ...(t.isCompatibility && { textureBindingViewDimension: viewDimension }),
+      size,
+      usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING,
+      // MAINTENANCE_TODO: change to 3 once derivatives with cube maps are supported
+      mipLevelCount: 1,
+    };
+    const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor, {
+      generator: makeRandomDepthComparisonTexelGenerator(descriptor, compare),
+    });
+    const sampler: GPUSamplerDescriptor = {
+      addressModeU: kShortAddressModeToAddressMode[mode],
+      addressModeV: kShortAddressModeToAddressMode[mode],
+      addressModeW: kShortAddressModeToAddressMode[mode],
+      compare,
+      minFilter,
+      magFilter: minFilter,
+      mipmapFilter: minFilter,
+    };
+
+    const calls: TextureCall<vec3>[] = generateSamplePointsCube(50, {
+      method: samplePoints,
+      sampler,
+      descriptor,
+      derivatives: true,
+      textureBuiltin: 'textureSampleCompare',
+      arrayIndex: { num: texture.depthOrArrayLayers / 6, type: A },
+      depthRef: true,
+      hashInputs: [format, samplePoints, mode, minFilter],
+    }).map(({ coords, derivativeMult, depthRef, arrayIndex }) => {
+      return {
+        builtin: 'textureSampleCompare',
+        arrayIndex,
+        arrayIndexType: A === 'i32' ? 'i' : 'u',
+        coordType: 'f',
+        coords,
+        derivativeMult,
+        depthRef,
+      };
+    });
+    const viewDescriptor = {
+      dimension: viewDimension,
+    };
+    const textureType = 'texture_depth_cube_array';
+    const results = await doTextureCalls(
+      t,
+      texture,
+      viewDescriptor,
+      textureType,
+      sampler,
+      calls,
+      'f'
+    );
+    const res = await checkCallResults(
+      t,
+      { texels, descriptor, viewDescriptor },
+      textureType,
+      sampler,
+      calls,
+      results,
+      'f',
+      texture
+    );
+    t.expectOK(res);
+  });
diff --git a/src/webgpu/shader/execution/expression/call/builtin/textureSampleCompareLevel.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/textureSampleCompareLevel.spec.ts
index 500df8a6ecaa..61d093a638cd 100644
--- a/src/webgpu/shader/execution/expression/call/builtin/textureSampleCompareLevel.spec.ts
+++ b/src/webgpu/shader/execution/expression/call/builtin/textureSampleCompareLevel.spec.ts
@@ -7,34 +7,38 @@ The textureSampleCompareLevel function is the same as textureSampleCompare, exce
    * The function does not compute derivatives.
    * There is no requirement for textureSampleCompareLevel to be invoked in uniform control flow.
  * textureSampleCompareLevel may be invoked in any shader stage.
+
+- TODO: test un-encodable formats.
 `;
 
 import { makeTestGroup } from '../../../../../../common/framework/test_group.js';
-import { GPUTest } from '../../../../../gpu_test.js';
-
-import { generateCoordBoundaries, generateOffsets } from './utils.js';
+import { kCompareFunctions } from '../../../../../capability_info.js';
+import {
+  isDepthTextureFormat,
+  isEncodableTextureFormat,
+  kDepthStencilFormats,
+} from '../../../../../format_info.js';
 
-export const g = makeTestGroup(GPUTest);
+import {
+  checkCallResults,
+  chooseTextureSize,
+  createTextureWithRandomDataAndGetTexels,
+  doTextureCalls,
+  generateSamplePointsCube,
+  generateTextureBuiltinInputs2D,
+  kCubeSamplePointMethods,
+  kSamplePointMethods,
+  kShortAddressModes,
+  kShortAddressModeToAddressMode,
+  kShortShaderStages,
+  makeRandomDepthComparisonTexelGenerator,
+  TextureCall,
+  vec2,
+  vec3,
+  WGSLTextureSampleTest,
+} from './texture_utils.js';
 
-g.test('stage')
-  .specURL('https://www.w3.org/TR/WGSL/#texturesamplecomparelevel')
-  .desc(
-    `
-Tests that 'textureSampleCompareLevel' maybe called in any shader stage.
-`
-  )
-  .params(u => u.combine('stage', ['fragment', 'vertex', 'compute'] as const))
-  .unimplemented();
-
-g.test('control_flow')
-  .specURL('https://www.w3.org/TR/WGSL/#texturesamplecomparelevel')
-  .desc(
-    `
-Tests that 'textureSampleCompareLevel' maybe called in non-uniform control flow.
-`
-  )
-  .params(u => u.combine('stage', ['fragment', 'vertex', 'compute'] as const))
-  .unimplemented();
+export const g = makeTestGroup(WGSLTextureSampleTest);
 
 g.test('2d_coords')
   .specURL('https://www.w3.org/TR/WGSL/#texturesamplecomparelevel')
@@ -45,7 +49,7 @@ fn textureSampleCompareLevel(t: texture_depth_2d, s: sampler_comparison, coords:
 
 Parameters:
  * t  The depth texture to sample.
- * s  The sampler_comparision type.
+ * s  The sampler_comparison type.
  * coords The texture coordinates used for sampling.
  * depth_ref The reference value to compare the sampled depth value against.
  * offset
@@ -56,14 +60,96 @@ Parameters:
       Values outside of this range will result in a shader-creation error.
 `
   )
-  .paramsSubcasesOnly(u =>
+  .params(u =>
     u
-      .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat'])
-      .combine('coords', generateCoordBoundaries(2))
-      .combine('depth_ref', [-1 /* smaller ref */, 0 /* equal ref */, 1 /* larger ref */] as const)
-      .combine('offset', generateOffsets(2))
+      .combine('stage', kShortShaderStages)
+      .combine('format', kDepthStencilFormats)
+      // filter out stencil only formats
+      .filter(t => isDepthTextureFormat(t.format))
+      // MAINTENANCE_TODO: Remove when support for depth24plus, depth24plus-stencil8, and depth32float-stencil8 is added.
+      .filter(t => isEncodableTextureFormat(t.format))
+      .combine('filt', ['nearest', 'linear'] as const)
+      .combine('modeU', kShortAddressModes)
+      .combine('modeV', kShortAddressModes)
+      .combine('offset', [false, true] as const)
+      .beginSubcases()
+      .combine('samplePoints', kSamplePointMethods)
+      .combine('compare', kCompareFunctions)
   )
-  .unimplemented();
+  .fn(async t => {
+    const {
+      format,
+      stage,
+      samplePoints,
+      modeU,
+      modeV,
+      filt: minFilter,
+      compare,
+      offset,
+    } = t.params;
+
+    const size = chooseTextureSize({ minSize: 16, minBlocks: 4, format });
+
+    const descriptor: GPUTextureDescriptor = {
+      format,
+      size,
+      usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING,
+      mipLevelCount: 3,
+    };
+    const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor, {
+      generator: makeRandomDepthComparisonTexelGenerator(descriptor, compare),
+    });
+    const sampler: GPUSamplerDescriptor = {
+      addressModeU: kShortAddressModeToAddressMode[modeU],
+      addressModeV: kShortAddressModeToAddressMode[modeV],
+      compare,
+      minFilter,
+      magFilter: minFilter,
+      mipmapFilter: minFilter,
+    };
+
+    const calls: TextureCall<vec2>[] = generateTextureBuiltinInputs2D(50, {
+      method: samplePoints,
+      textureBuiltin: 'textureSampleCompareLevel',
+      sampler,
+      descriptor,
+      derivatives: true,
+      depthRef: true,
+      offset,
+      hashInputs: [stage, format, samplePoints, modeU, modeV, minFilter, offset],
+    }).map(({ coords, derivativeMult, arrayIndex, depthRef, offset }) => {
+      return {
+        builtin: 'textureSampleCompareLevel',
+        coordType: 'f',
+        coords,
+        derivativeMult,
+        depthRef,
+        offset,
+      };
+    });
+    const textureType = 'texture_depth_2d';
+    const viewDescriptor = {};
+    const results = await doTextureCalls(
+      t,
+      texture,
+      viewDescriptor,
+      textureType,
+      sampler,
+      calls,
+      stage
+    );
+    const res = await checkCallResults(
+      t,
+      { texels, descriptor, viewDescriptor },
+      textureType,
+      sampler,
+      calls,
+      results,
+      stage,
+      texture
+    );
+    t.expectOK(res);
+  });
 
 g.test('3d_coords')
   .specURL('https://www.w3.org/TR/WGSL/#texturesamplecomparelevel')
@@ -73,31 +159,107 @@ fn textureSampleCompareLevel(t: texture_depth_cube, s: sampler_comparison, coord
 
 Parameters:
  * t  The depth texture to sample.
- * s  The sampler_comparision type.
+ * s  The sampler_comparison type.
  * coords The texture coordinates used for sampling.
  * depth_ref The reference value to compare the sampled depth value against.
 `
   )
-  .paramsSubcasesOnly(u =>
+  .params(u =>
     u
-      .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat'])
-      .combine('coords', generateCoordBoundaries(3))
-      .combine('depth_ref', [-1 /* smaller ref */, 0 /* equal ref */, 1 /* larger ref */] as const)
+      .combine('stage', kShortShaderStages)
+      .combine('format', kDepthStencilFormats)
+      // filter out stencil only formats
+      .filter(t => isDepthTextureFormat(t.format))
+      // MAINTENANCE_TODO: Remove when support for depth24plus, depth24plus-stencil8, and depth32float-stencil8 is added.
+      .filter(t => isEncodableTextureFormat(t.format))
+      .combine('filt', ['nearest', 'linear'] as const)
+      .combine('mode', kShortAddressModes)
+      .beginSubcases()
+      .combine('samplePoints', kCubeSamplePointMethods)
+      .combine('compare', kCompareFunctions)
   )
-  .unimplemented();
+  .fn(async t => {
+    const { format, stage, samplePoints, mode, filt: minFilter, compare } = t.params;
+
+    const viewDimension: GPUTextureViewDimension = 'cube';
+    const size = chooseTextureSize({ minSize: 16, minBlocks: 2, format, viewDimension });
+
+    const descriptor: GPUTextureDescriptor = {
+      format,
+      ...(t.isCompatibility && { textureBindingViewDimension: viewDimension }),
+      size,
+      usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING,
+      // MAINTENANCE_TODO: change to 3
+      mipLevelCount: 1,
+    };
+    const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor, {
+      generator: makeRandomDepthComparisonTexelGenerator(descriptor, compare),
+    });
+    const sampler: GPUSamplerDescriptor = {
+      addressModeU: kShortAddressModeToAddressMode[mode],
+      addressModeV: kShortAddressModeToAddressMode[mode],
+      addressModeW: kShortAddressModeToAddressMode[mode],
+      compare,
+      minFilter,
+      magFilter: minFilter,
+      mipmapFilter: minFilter,
+    };
+
+    const calls: TextureCall<vec3>[] = generateSamplePointsCube(50, {
+      method: samplePoints,
+      sampler,
+      descriptor,
+      derivatives: true,
+      depthRef: true,
+      textureBuiltin: 'textureSampleCompareLevel',
+      hashInputs: [stage, format, samplePoints, mode, minFilter, compare],
+    }).map(({ coords, derivativeMult, depthRef }) => {
+      return {
+        builtin: 'textureSampleCompareLevel',
+        coordType: 'f',
+        coords,
+        derivativeMult,
+        depthRef,
+      };
+    });
+    const viewDescriptor = {
+      dimension: viewDimension,
+    };
+    const textureType = 'texture_depth_cube';
+    const results = await doTextureCalls(
+      t,
+      texture,
+      viewDescriptor,
+      textureType,
+      sampler,
+      calls,
+      stage
+    );
+    const res = await checkCallResults(
+      t,
+      { texels, descriptor, viewDescriptor },
+      textureType,
+      sampler,
+      calls,
+      results,
+      stage,
+      texture
+    );
+    t.expectOK(res);
+  });
 
 g.test('arrayed_2d_coords')
   .specURL('https://www.w3.org/TR/WGSL/#texturesamplecomparelevel')
   .desc(
     `
-C is i32 or u32
+A is i32 or u32
 
-fn textureSampleCompareLevel(t: texture_depth_2d_array, s: sampler_comparison, coords: vec2<f32>, array_index: C, depth_ref: f32) -> f32
-fn textureSampleCompareLevel(t: texture_depth_2d_array, s: sampler_comparison, coords: vec2<f32>, array_index: C, depth_ref: f32, offset: vec2<i32>) -> f32
+fn textureSampleCompareLevel(t: texture_depth_2d_array, s: sampler_comparison, coords: vec2<f32>, array_index: A, depth_ref: f32) -> f32
+fn textureSampleCompareLevel(t: texture_depth_2d_array, s: sampler_comparison, coords: vec2<f32>, array_index: A, depth_ref: f32, offset: vec2<i32>) -> f32
 
 Parameters:
  * t  The depth texture to sample.
- * s  The sampler_comparision type.
+ * s  The sampler_comparison type.
  * coords The texture coordinates used for sampling.
  * array_index: The 0-based texture array index to sample.
  * depth_ref The reference value to compare the sampled depth value against.
@@ -109,41 +271,207 @@ Parameters:
       Values outside of this range will result in a shader-creation error.
 `
   )
-  .paramsSubcasesOnly(u =>
+  .params(u =>
     u
-      .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat'])
-      .combine('coords', generateCoordBoundaries(2))
-      .combine('C', ['i32', 'u32'] as const)
-      .combine('C_value', [-1, 0, 1, 2, 3, 4] as const)
-      /* array_index not param'd as out-of-bounds is implementation specific */
-      .combine('depth_ref', [-1 /* smaller ref */, 0 /* equal ref */, 1 /* larger ref */] as const)
-      .combine('offset', generateOffsets(2))
+      .combine('stage', kShortShaderStages)
+      .combine('format', kDepthStencilFormats)
+      // filter out stencil only formats
+      .filter(t => isDepthTextureFormat(t.format))
+      // MAINTENANCE_TODO: Remove when support for depth24plus, depth24plus-stencil8, and depth32float-stencil8 is added.
+      .filter(t => isEncodableTextureFormat(t.format))
+      .combine('filt', ['nearest', 'linear'] as const)
+      .combine('modeU', kShortAddressModes)
+      .combine('modeV', kShortAddressModes)
+      .combine('offset', [false, true] as const)
+      .beginSubcases()
+      .combine('samplePoints', kSamplePointMethods)
+      .combine('A', ['i32', 'u32'] as const)
+      .combine('compare', kCompareFunctions)
   )
-  .unimplemented();
+  .beforeAllSubcases(t => {
+    t.skipIfTextureFormatNotSupported(t.params.format);
+  })
+  .fn(async t => {
+    const {
+      format,
+      stage,
+      samplePoints,
+      A,
+      modeU,
+      modeV,
+      filt: minFilter,
+      compare,
+      offset,
+    } = t.params;
+
+    const viewDimension = '2d-array';
+    const size = chooseTextureSize({ minSize: 16, minBlocks: 4, format, viewDimension });
+
+    const descriptor: GPUTextureDescriptor = {
+      format,
+      size,
+      usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING,
+      mipLevelCount: 3,
+    };
+    const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor, {
+      generator: makeRandomDepthComparisonTexelGenerator(descriptor, compare),
+    });
+    const sampler: GPUSamplerDescriptor = {
+      addressModeU: kShortAddressModeToAddressMode[modeU],
+      addressModeV: kShortAddressModeToAddressMode[modeV],
+      compare,
+      minFilter,
+      magFilter: minFilter,
+      mipmapFilter: minFilter,
+    };
+
+    const calls: TextureCall<vec2>[] = generateTextureBuiltinInputs2D(50, {
+      method: samplePoints,
+      textureBuiltin: 'textureSampleCompareLevel',
+      sampler,
+      descriptor,
+      derivatives: true,
+      arrayIndex: { num: texture.depthOrArrayLayers, type: A },
+      depthRef: true,
+      offset,
+      hashInputs: [stage, format, samplePoints, A, modeU, modeV, minFilter, offset],
+    }).map(({ coords, derivativeMult, arrayIndex, depthRef, offset }) => {
+      return {
+        builtin: 'textureSampleCompareLevel',
+        coordType: 'f',
+        coords,
+        derivativeMult,
+        arrayIndex,
+        arrayIndexType: A === 'i32' ? 'i' : 'u',
+        depthRef,
+        offset,
+      };
+    });
+    const textureType = 'texture_depth_2d_array';
+    const viewDescriptor = {};
+    const results = await doTextureCalls(
+      t,
+      texture,
+      viewDescriptor,
+      textureType,
+      sampler,
+      calls,
+      stage
+    );
+    const res = await checkCallResults(
+      t,
+      { texels, descriptor, viewDescriptor },
+      textureType,
+      sampler,
+      calls,
+      results,
+      stage,
+      texture
+    );
+    t.expectOK(res);
+  });
 
 g.test('arrayed_3d_coords')
   .specURL('https://www.w3.org/TR/WGSL/#texturesamplecomparelevel')
   .desc(
     `
-C is i32 or u32
+A is i32 or u32
 
-fn textureSampleCompareLevel(t: texture_depth_cube_array, s: sampler_comparison, coords: vec3<f32>, array_index: C, depth_ref: f32) -> f32
+fn textureSampleCompareLevel(t: texture_depth_cube_array, s: sampler_comparison, coords: vec3<f32>, array_index: A, depth_ref: f32) -> f32
 
 Parameters:
  * t  The depth texture to sample.
- * s  The sampler_comparision type.
+ * s  The sampler_comparison type.
  * coords The texture coordinates used for sampling.
  * array_index: The 0-based texture array index to sample.
  * depth_ref The reference value to compare the sampled depth value against.
 `
   )
-  .paramsSubcasesOnly(u =>
+  .params(u =>
     u
-      .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat'])
-      .combine('coords', generateCoordBoundaries(3))
-      .combine('C', ['i32', 'u32'] as const)
-      .combine('C_value', [-1, 0, 1, 2, 3, 4] as const)
-      /* array_index not param'd as out-of-bounds is implementation specific */
-      .combine('depth_ref', [-1 /* smaller ref */, 0 /* equal ref */, 1 /* larger ref */] as const)
+      .combine('stage', kShortShaderStages)
+      .combine('format', kDepthStencilFormats)
+      // filter out stencil only formats
+      .filter(t => isDepthTextureFormat(t.format))
+      // MAINTENANCE_TODO: Remove when support for depth24plus, depth24plus-stencil8, and depth32float-stencil8 is added.
+      .filter(t => isEncodableTextureFormat(t.format))
+      .combine('filt', ['nearest', 'linear'] as const)
+      .combine('mode', kShortAddressModes)
+      .beginSubcases()
+      .combine('samplePoints', kCubeSamplePointMethods)
+      .combine('A', ['i32', 'u32'] as const)
+      .combine('compare', kCompareFunctions)
   )
-  .unimplemented();
+  .beforeAllSubcases(t => {
+    t.skipIfTextureViewDimensionNotSupported('cube-array');
+  })
+  .fn(async t => {
+    const { format, A, stage, samplePoints, mode, filt: minFilter, compare } = t.params;
+
+    const viewDimension: GPUTextureViewDimension = 'cube-array';
+    const size = chooseTextureSize({ minSize: 8, minBlocks: 2, format, viewDimension });
+
+    const descriptor: GPUTextureDescriptor = {
+      format,
+      ...(t.isCompatibility && { textureBindingViewDimension: viewDimension }),
+      size,
+      usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING,
+    };
+    const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor, {
+      generator: makeRandomDepthComparisonTexelGenerator(descriptor, compare),
+    });
+    const sampler: GPUSamplerDescriptor = {
+      addressModeU: kShortAddressModeToAddressMode[mode],
+      addressModeV: kShortAddressModeToAddressMode[mode],
+      addressModeW: kShortAddressModeToAddressMode[mode],
+      compare,
+      minFilter,
+      magFilter: minFilter,
+      mipmapFilter: minFilter,
+    };
+
+    const calls: TextureCall<vec3>[] = generateSamplePointsCube(50, {
+      method: samplePoints,
+      sampler,
+      descriptor,
+      derivatives: true,
+      textureBuiltin: 'textureSampleCompareLevel',
+      arrayIndex: { num: texture.depthOrArrayLayers / 6, type: A },
+      depthRef: true,
+      hashInputs: [stage, format, samplePoints, mode, minFilter],
+    }).map(({ coords, derivativeMult, depthRef, arrayIndex }) => {
+      return {
+        builtin: 'textureSampleCompareLevel',
+        arrayIndex,
+        arrayIndexType: A === 'i32' ? 'i' : 'u',
+        coordType: 'f',
+        coords,
+        derivativeMult,
+        depthRef,
+      };
+    });
+    const viewDescriptor = {
+      dimension: viewDimension,
+    };
+    const textureType = 'texture_depth_cube_array';
+    const results = await doTextureCalls(
+      t,
+      texture,
+      viewDescriptor,
+      textureType,
+      sampler,
+      calls,
+      stage
+    );
+    const res = await checkCallResults(
+      t,
+      { texels, descriptor, viewDescriptor },
+      textureType,
+      sampler,
+      calls,
+      results,
+      stage,
+      texture
+    );
+    t.expectOK(res);
+  });
diff --git a/src/webgpu/shader/execution/expression/call/builtin/textureSampleGrad.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/textureSampleGrad.spec.ts
index e0d754ece391..8da6ffdfe9c1 100644
--- a/src/webgpu/shader/execution/expression/call/builtin/textureSampleGrad.spec.ts
+++ b/src/webgpu/shader/execution/expression/call/builtin/textureSampleGrad.spec.ts
@@ -1,13 +1,42 @@
 export const description = `
 Samples a texture using explicit gradients.
+
+- TODO: test cube maps with more than one mip level.
+- TODO: Test un-encodable formats.
 `;
 
 import { makeTestGroup } from '../../../../../../common/framework/test_group.js';
-import { GPUTest } from '../../../../../gpu_test.js';
+import { kCompressedTextureFormats, kEncodableTextureFormats } from '../../../../../format_info.js';
+
+import {
+  appendComponentTypeForFormatToTextureType,
+  checkCallResults,
+  chooseTextureSize,
+  createTextureWithRandomDataAndGetTexels,
+  doTextureCalls,
+  generateSamplePointsCube,
+  generateTextureBuiltinInputs2D,
+  generateTextureBuiltinInputs3D,
+  getTextureTypeForTextureViewDimension,
+  isPotentiallyFilterableAndFillable,
+  isSupportedViewFormatCombo,
+  kCubeSamplePointMethods,
+  kSamplePointMethods,
+  kShortAddressModes,
+  kShortAddressModeToAddressMode,
+  kShortShaderStages,
+  SamplePointMethods,
+  skipIfNeedsFilteringAndIsUnfilterable,
+  skipIfTextureFormatNotSupportedNotAvailableOrNotFilterable,
+  TextureCall,
+  vec2,
+  vec3,
+  WGSLTextureSampleTest,
+} from './texture_utils.js';
 
-import { generateCoordBoundaries, generateOffsets } from './utils.js';
+const kTestableColorFormats = [...kEncodableTextureFormats, ...kCompressedTextureFormats] as const;
 
-export const g = makeTestGroup(GPUTest);
+export const g = makeTestGroup(WGSLTextureSampleTest);
 
 g.test('sampled_2d_coords')
   .specURL('https://www.w3.org/TR/WGSL/#texturesamplegrad')
@@ -30,13 +59,82 @@ Parameters:
       Values outside of this range will result in a shader-creation error.
 `
   )
-  .paramsSubcasesOnly(u =>
+  .params(u =>
     u
-      .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat'])
-      .combine('coords', generateCoordBoundaries(2))
-      .combine('offset', generateOffsets(2))
+      .combine('stage', kShortShaderStages)
+      .combine('format', kTestableColorFormats)
+      .filter(t => isPotentiallyFilterableAndFillable(t.format))
+      .combine('filt', ['nearest', 'linear'] as const)
+      .combine('modeU', kShortAddressModes)
+      .combine('modeV', kShortAddressModes)
+      .combine('offset', [false, true] as const)
+      .beginSubcases()
+      .combine('samplePoints', kSamplePointMethods)
   )
-  .unimplemented();
+  .beforeAllSubcases(t =>
+    skipIfTextureFormatNotSupportedNotAvailableOrNotFilterable(t, t.params.format)
+  )
+  .fn(async t => {
+    const { format, stage, samplePoints, modeU, modeV, filt: minFilter, offset } = t.params;
+    skipIfNeedsFilteringAndIsUnfilterable(t, minFilter, format);
+
+    // We want at least 4 blocks or something wide enough for 3 mip levels.
+    const [width, height] = chooseTextureSize({ minSize: 8, minBlocks: 4, format });
+    const descriptor: GPUTextureDescriptor = {
+      format,
+      size: { width, height },
+      mipLevelCount: 3,
+      usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING,
+    };
+    const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor);
+    const sampler: GPUSamplerDescriptor = {
+      addressModeU: kShortAddressModeToAddressMode[modeU],
+      addressModeV: kShortAddressModeToAddressMode[modeV],
+      minFilter,
+      magFilter: minFilter,
+      mipmapFilter: minFilter,
+    };
+
+    const calls: TextureCall<vec2>[] = generateTextureBuiltinInputs2D(50, {
+      method: samplePoints,
+      sampler,
+      descriptor,
+      grad: true,
+      offset,
+      hashInputs: [stage, format, samplePoints, modeU, modeV, minFilter, offset],
+    }).map(({ coords, offset, ddx, ddy }) => {
+      return {
+        builtin: 'textureSampleGrad',
+        coordType: 'f',
+        coords,
+        ddx,
+        ddy,
+        offset,
+      };
+    });
+    const textureType = appendComponentTypeForFormatToTextureType('texture_2d', format);
+    const viewDescriptor = {};
+    const results = await doTextureCalls(
+      t,
+      texture,
+      viewDescriptor,
+      textureType,
+      sampler,
+      calls,
+      stage
+    );
+    const res = await checkCallResults(
+      t,
+      { texels, descriptor, viewDescriptor },
+      textureType,
+      sampler,
+      calls,
+      results,
+      stage,
+      texture
+    );
+    t.expectOK(res);
+  });
 
 g.test('sampled_3d_coords')
   .specURL('https://www.w3.org/TR/WGSL/#texturesamplegrad')
@@ -60,13 +158,121 @@ Parameters:
       Values outside of this range will result in a shader-creation error.
 `
   )
-  .paramsSubcasesOnly(u =>
+  .params(u =>
     u
-      .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat'])
-      .combine('coords', generateCoordBoundaries(3))
-      .combine('offset', generateOffsets(3))
+      .combine('stage', kShortShaderStages)
+      .combine('format', kTestableColorFormats)
+      .filter(t => isPotentiallyFilterableAndFillable(t.format))
+      .combine('dim', ['3d', 'cube'] as const)
+      .filter(t => isSupportedViewFormatCombo(t.format, t.dim))
+      .combine('filt', ['nearest', 'linear'] as const)
+      .combine('modeU', kShortAddressModes)
+      .combine('modeV', kShortAddressModes)
+      .combine('modeW', kShortAddressModes)
+      .combine('offset', [false, true] as const)
+      .filter(t => t.dim !== 'cube' || t.offset !== true)
+      .beginSubcases()
+      .combine('samplePoints', kCubeSamplePointMethods)
+      .filter(t => t.samplePoints !== 'cube-edges' || t.dim !== '3d')
+  )
+  .beforeAllSubcases(t =>
+    skipIfTextureFormatNotSupportedNotAvailableOrNotFilterable(t, t.params.format)
   )
-  .unimplemented();
+  .fn(async t => {
+    const {
+      format,
+      dim: viewDimension,
+      stage,
+      samplePoints,
+      modeU,
+      modeV,
+      modeW,
+      filt: minFilter,
+      offset,
+    } = t.params;
+    skipIfNeedsFilteringAndIsUnfilterable(t, minFilter, format);
+
+    const size = chooseTextureSize({ minSize: 8, minBlocks: 2, format, viewDimension });
+    const descriptor: GPUTextureDescriptor = {
+      format,
+      dimension: viewDimension === '3d' ? '3d' : '2d',
+      ...(t.isCompatibility && { textureBindingViewDimension: viewDimension }),
+      size,
+      // MAINTENANCE_TODO: use 3 for cube maps when derivatives are supported for cube maps.
+      mipLevelCount: viewDimension === '3d' ? 3 : 1,
+      usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING,
+    };
+    const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor);
+    const sampler: GPUSamplerDescriptor = {
+      addressModeU: kShortAddressModeToAddressMode[modeU],
+      addressModeV: kShortAddressModeToAddressMode[modeV],
+      addressModeW: kShortAddressModeToAddressMode[modeW],
+      minFilter,
+      magFilter: minFilter,
+    };
+
+    const hashInputs = [
+      format,
+      viewDimension,
+      samplePoints,
+      modeU,
+      modeV,
+      modeW,
+      minFilter,
+      offset,
+    ];
+    const calls: TextureCall<vec3>[] = (
+      viewDimension === '3d'
+        ? generateTextureBuiltinInputs3D(50, {
+            method: samplePoints as SamplePointMethods,
+            sampler,
+            descriptor,
+            grad: true,
+            offset,
+            hashInputs,
+          })
+        : generateSamplePointsCube(50, {
+            method: samplePoints,
+            sampler,
+            descriptor,
+            grad: true,
+            hashInputs,
+          })
+    ).map(({ coords, offset, ddx, ddy }) => {
+      return {
+        builtin: 'textureSampleGrad',
+        coordType: 'f',
+        coords,
+        ddx,
+        ddy,
+        offset,
+      };
+    });
+    const viewDescriptor = {
+      dimension: viewDimension,
+    };
+    const textureType = getTextureTypeForTextureViewDimension(viewDimension)!;
+    const results = await doTextureCalls(
+      t,
+      texture,
+      viewDescriptor,
+      textureType,
+      sampler,
+      calls,
+      stage
+    );
+    const res = await checkCallResults(
+      t,
+      { texels, descriptor, viewDescriptor },
+      textureType,
+      sampler,
+      calls,
+      results,
+      stage,
+      texture
+    );
+    t.expectOK(res);
+  });
 
 g.test('sampled_array_2d_coords')
   .specURL('https://www.w3.org/TR/WGSL/#texturesamplegrad')
@@ -92,16 +298,88 @@ Parameters:
       Values outside of this range will result in a shader-creation error.
 `
   )
-  .paramsSubcasesOnly(u =>
+  .params(u =>
     u
-      .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat'])
-      .combine('C', ['i32', 'u32'] as const)
-      .combine('C_value', [-1, 0, 1, 2, 3, 4] as const)
-      .combine('coords', generateCoordBoundaries(2))
-      /* array_index not param'd as out-of-bounds is implementation specific */
-      .combine('offset', generateOffsets(2))
+      .combine('stage', kShortShaderStages)
+      .combine('format', kTestableColorFormats)
+      .filter(t => isPotentiallyFilterableAndFillable(t.format))
+      .combine('filt', ['nearest', 'linear'] as const)
+      .combine('modeU', kShortAddressModes)
+      .combine('modeV', kShortAddressModes)
+      .combine('offset', [false, true] as const)
+      .beginSubcases()
+      .combine('samplePoints', kSamplePointMethods)
+      .combine('A', ['i32', 'u32'] as const)
+  )
+  .beforeAllSubcases(t =>
+    skipIfTextureFormatNotSupportedNotAvailableOrNotFilterable(t, t.params.format)
   )
-  .unimplemented();
+  .fn(async t => {
+    const { format, stage, samplePoints, A, modeU, modeV, filt: minFilter, offset } = t.params;
+    skipIfNeedsFilteringAndIsUnfilterable(t, minFilter, format);
+
+    // We want at least 4 blocks or something wide enough for 3 mip levels.
+    const [width, height] = chooseTextureSize({ minSize: 8, minBlocks: 4, format });
+    const depthOrArrayLayers = 4;
+
+    const descriptor: GPUTextureDescriptor = {
+      format,
+      size: { width, height, depthOrArrayLayers },
+      usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING,
+      mipLevelCount: 3,
+    };
+    const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor);
+    const sampler: GPUSamplerDescriptor = {
+      addressModeU: kShortAddressModeToAddressMode[modeU],
+      addressModeV: kShortAddressModeToAddressMode[modeV],
+      minFilter,
+      magFilter: minFilter,
+      mipmapFilter: minFilter,
+    };
+
+    const calls: TextureCall<vec2>[] = generateTextureBuiltinInputs2D(50, {
+      method: samplePoints,
+      sampler,
+      descriptor,
+      arrayIndex: { num: texture.depthOrArrayLayers, type: A },
+      grad: true,
+      offset,
+      hashInputs: [stage, format, samplePoints, A, modeU, modeV, minFilter, offset],
+    }).map(({ coords, ddx, ddy, arrayIndex, offset }) => {
+      return {
+        builtin: 'textureSampleGrad',
+        coordType: 'f',
+        coords,
+        ddx,
+        ddy,
+        arrayIndex,
+        arrayIndexType: A === 'i32' ? 'i' : 'u',
+        offset,
+      };
+    });
+    const textureType = 'texture_2d_array<f32>';
+    const viewDescriptor = {};
+    const results = await doTextureCalls(
+      t,
+      texture,
+      viewDescriptor,
+      textureType,
+      sampler,
+      calls,
+      stage
+    );
+    const res = await checkCallResults(
+      t,
+      { texels, descriptor, viewDescriptor },
+      textureType,
+      sampler,
+      calls,
+      results,
+      stage,
+      texture
+    );
+    t.expectOK(res);
+  });
 
 g.test('sampled_array_3d_coords')
   .specURL('https://www.w3.org/TR/WGSL/#texturesamplegrad')
@@ -126,11 +404,89 @@ Parameters:
       Values outside of this range will result in a shader-creation error.
 `
   )
-  .paramsSubcasesOnly(u =>
+  .params(u =>
     u
-      .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat'])
-      .combine('C', ['i32', 'u32'] as const)
-      .combine('C_value', [-1, 0, 1, 2, 3, 4] as const)
-      .combine('coords', generateCoordBoundaries(3))
+      .combine('stage', kShortShaderStages)
+      .combine('format', kTestableColorFormats)
+      .filter(t => isPotentiallyFilterableAndFillable(t.format))
+      .combine('filt', ['nearest', 'linear'] as const)
+      .combine('mode', kShortAddressModes)
+      .beginSubcases()
+      .combine('samplePoints', kCubeSamplePointMethods)
+      .combine('A', ['i32', 'u32'] as const)
   )
-  .unimplemented();
+  .beforeAllSubcases(t => {
+    skipIfTextureFormatNotSupportedNotAvailableOrNotFilterable(t, t.params.format);
+    t.skipIfTextureViewDimensionNotSupported('cube-array');
+  })
+  .fn(async t => {
+    const { format, stage, samplePoints, A, mode, filt: minFilter } = t.params;
+    skipIfNeedsFilteringAndIsUnfilterable(t, minFilter, format);
+
+    const viewDimension: GPUTextureViewDimension = 'cube-array';
+    const size = chooseTextureSize({
+      minSize: 32,
+      minBlocks: 4,
+      format,
+      viewDimension,
+    });
+    const descriptor: GPUTextureDescriptor = {
+      format,
+      size,
+      usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING,
+      // MAINTENANCE_TODO: use 3 for cube maps when derivatives are supported for cube maps.
+      mipLevelCount: 1,
+    };
+    const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor);
+    const sampler: GPUSamplerDescriptor = {
+      addressModeU: kShortAddressModeToAddressMode[mode],
+      addressModeV: kShortAddressModeToAddressMode[mode],
+      addressModeW: kShortAddressModeToAddressMode[mode],
+      minFilter,
+      magFilter: minFilter,
+      mipmapFilter: minFilter,
+    };
+
+    const calls: TextureCall<vec3>[] = generateSamplePointsCube(50, {
+      method: samplePoints,
+      sampler,
+      descriptor,
+      grad: true,
+      arrayIndex: { num: texture.depthOrArrayLayers / 6, type: A },
+      hashInputs: [stage, format, viewDimension, A, samplePoints, mode, minFilter],
+    }).map(({ coords, ddx, ddy, arrayIndex }) => {
+      return {
+        builtin: 'textureSampleGrad',
+        coordType: 'f',
+        coords,
+        ddx,
+        ddy,
+        arrayIndex,
+        arrayIndexType: A === 'i32' ? 'i' : 'u',
+      };
+    });
+    const viewDescriptor = {
+      dimension: viewDimension,
+    };
+    const textureType = getTextureTypeForTextureViewDimension(viewDimension);
+    const results = await doTextureCalls(
+      t,
+      texture,
+      viewDescriptor,
+      textureType,
+      sampler,
+      calls,
+      stage
+    );
+    const res = await checkCallResults(
+      t,
+      { texels, descriptor, viewDescriptor },
+      textureType,
+      sampler,
+      calls,
+      results,
+      stage,
+      texture
+    );
+    t.expectOK(res);
+  });
diff --git a/src/webgpu/shader/execution/expression/call/builtin/textureSampleLevel.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/textureSampleLevel.spec.ts
index 729563553260..840bafcab223 100644
--- a/src/webgpu/shader/execution/expression/call/builtin/textureSampleLevel.spec.ts
+++ b/src/webgpu/shader/execution/expression/call/builtin/textureSampleLevel.spec.ts
@@ -1,24 +1,11 @@
 export const description = `
 Samples a texture.
 
-Must only be used in a fragment shader stage.
-Must only be invoked in uniform control flow.
-
 - TODO: Test un-encodable formats.
-- TODO: set mipLevelCount to 3 for cubemaps. See MAINTENANCE_TODO below
-
-  The issue is sampling a corner of a cubemap is undefined. We try to quantize coordinates
-  so we never get a corner but when sampling smaller mip levels that's more difficult unless we make the textures
-  larger. Larger is slower.
-
-  Solution 1: Fix the quantization
-  Solution 2: special case checking cube corners. Expect some value between the color of the 3 corner texels.
-
 `;
 
 import { makeTestGroup } from '../../../../../../common/framework/test_group.js';
 import {
-  isCompressedTextureFormat,
   isDepthTextureFormat,
   isEncodableTextureFormat,
   kCompressedTextureFormats,
@@ -38,9 +25,14 @@ import {
   getDepthOrArrayLayersForViewDimension,
   getTextureTypeForTextureViewDimension,
   isPotentiallyFilterableAndFillable,
+  isSupportedViewFormatCombo,
   kCubeSamplePointMethods,
   kSamplePointMethods,
+  kShortAddressModes,
+  kShortAddressModeToAddressMode,
+  kShortShaderStages,
   SamplePointMethods,
+  skipIfNeedsFilteringAndIsUnfilterable,
   skipIfTextureFormatNotSupportedNotAvailableOrNotFilterable,
   TextureCall,
   vec2,
@@ -78,20 +70,22 @@ Parameters:
   )
   .params(u =>
     u
+      .combine('stage', kShortShaderStages)
       .combine('format', kTestableColorFormats)
       .filter(t => isPotentiallyFilterableAndFillable(t.format))
+      .combine('filt', ['nearest', 'linear'] as const)
+      .combine('modeU', kShortAddressModes)
+      .combine('modeV', kShortAddressModes)
+      .combine('offset', [false, true] as const)
       .beginSubcases()
       .combine('samplePoints', kSamplePointMethods)
-      .combine('addressModeU', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const)
-      .combine('addressModeV', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const)
-      .combine('minFilter', ['nearest', 'linear'] as const)
-      .combine('offset', [false, true] as const)
   )
   .beforeAllSubcases(t =>
     skipIfTextureFormatNotSupportedNotAvailableOrNotFilterable(t, t.params.format)
   )
   .fn(async t => {
-    const { format, samplePoints, addressModeU, addressModeV, minFilter, offset } = t.params;
+    const { format, stage, samplePoints, modeU, modeV, filt: minFilter, offset } = t.params;
+    skipIfNeedsFilteringAndIsUnfilterable(t, minFilter, format);
 
     // We want at least 4 blocks or something wide enough for 3 mip levels.
     const [width, height] = chooseTextureSize({ minSize: 8, minBlocks: 4, format });
@@ -103,8 +97,8 @@ Parameters:
     };
     const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor);
     const sampler: GPUSamplerDescriptor = {
-      addressModeU,
-      addressModeV,
+      addressModeU: kShortAddressModeToAddressMode[modeU],
+      addressModeV: kShortAddressModeToAddressMode[modeV],
       minFilter,
       magFilter: minFilter,
       mipmapFilter: minFilter,
@@ -116,7 +110,7 @@ Parameters:
       descriptor,
       mipLevel: { num: texture.mipLevelCount, type: 'f32' },
       offset,
-      hashInputs: [format, samplePoints, addressModeU, addressModeV, minFilter, offset],
+      hashInputs: [stage, format, samplePoints, modeU, modeV, minFilter, offset],
     }).map(({ coords, mipLevel, offset }) => {
       return {
         builtin: 'textureSampleLevel',
@@ -129,14 +123,24 @@ Parameters:
     });
     const textureType = appendComponentTypeForFormatToTextureType('texture_2d', format);
     const viewDescriptor = {};
-    const results = await doTextureCalls(t, texture, viewDescriptor, textureType, sampler, calls);
+    const results = await doTextureCalls(
+      t,
+      texture,
+      viewDescriptor,
+      textureType,
+      sampler,
+      calls,
+      stage
+    );
     const res = await checkCallResults(
       t,
       { texels, descriptor, viewDescriptor },
       textureType,
       sampler,
       calls,
-      results
+      results,
+      stage,
+      texture
     );
     t.expectOK(res);
   });
@@ -170,21 +174,23 @@ Parameters:
   )
   .params(u =>
     u
+      .combine('stage', kShortShaderStages)
       .combine('format', kTestableColorFormats)
       .filter(t => isPotentiallyFilterableAndFillable(t.format))
+      .combine('filt', ['nearest', 'linear'] as const)
+      .combine('modeU', kShortAddressModes)
+      .combine('modeV', kShortAddressModes)
+      .combine('offset', [false, true] as const)
       .beginSubcases()
       .combine('samplePoints', kSamplePointMethods)
       .combine('A', ['i32', 'u32'] as const)
-      .combine('addressModeU', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const)
-      .combine('addressModeV', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const)
-      .combine('minFilter', ['nearest', 'linear'] as const)
-      .combine('offset', [false, true] as const)
   )
   .beforeAllSubcases(t =>
     skipIfTextureFormatNotSupportedNotAvailableOrNotFilterable(t, t.params.format)
   )
   .fn(async t => {
-    const { format, samplePoints, A, addressModeU, addressModeV, minFilter, offset } = t.params;
+    const { format, stage, samplePoints, A, modeU, modeV, filt: minFilter, offset } = t.params;
+    skipIfNeedsFilteringAndIsUnfilterable(t, minFilter, format);
 
     // We want at least 4 blocks or something wide enough for 3 mip levels.
     const [width, height] = chooseTextureSize({ minSize: 8, minBlocks: 4, format });
@@ -198,8 +204,8 @@ Parameters:
     };
     const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor);
     const sampler: GPUSamplerDescriptor = {
-      addressModeU,
-      addressModeV,
+      addressModeU: kShortAddressModeToAddressMode[modeU],
+      addressModeV: kShortAddressModeToAddressMode[modeV],
       minFilter,
       magFilter: minFilter,
       mipmapFilter: minFilter,
@@ -212,7 +218,7 @@ Parameters:
       mipLevel: { num: texture.mipLevelCount, type: 'f32' },
       arrayIndex: { num: texture.depthOrArrayLayers, type: A },
       offset,
-      hashInputs: [format, samplePoints, A, addressModeU, addressModeV, minFilter, offset],
+      hashInputs: [stage, format, samplePoints, A, modeU, modeV, minFilter, offset],
     }).map(({ coords, mipLevel, arrayIndex, offset }) => {
       return {
         builtin: 'textureSampleLevel',
@@ -227,14 +233,24 @@ Parameters:
     });
     const textureType = appendComponentTypeForFormatToTextureType('texture_2d_array', format);
     const viewDescriptor = {};
-    const results = await doTextureCalls(t, texture, viewDescriptor, textureType, sampler, calls);
+    const results = await doTextureCalls(
+      t,
+      texture,
+      viewDescriptor,
+      textureType,
+      sampler,
+      calls,
+      stage
+    );
     const res = await checkCallResults(
       t,
       { texels, descriptor, viewDescriptor },
       textureType,
       sampler,
       calls,
-      results
+      results,
+      stage,
+      texture
     );
     t.expectOK(res);
   });
@@ -266,25 +282,35 @@ Parameters:
   )
   .params(u =>
     u
+      .combine('stage', kShortShaderStages)
       .combine('format', kTestableColorFormats)
       .filter(t => isPotentiallyFilterableAndFillable(t.format))
-      .combine('viewDimension', ['3d', 'cube'] as const)
-      .filter(t => !isCompressedTextureFormat(t.format) || t.viewDimension === 'cube')
+      .combine('dim', ['3d', 'cube'] as const)
+      .filter(t => isSupportedViewFormatCombo(t.format, t.dim))
+      .combine('filt', ['nearest', 'linear'] as const)
+      .combine('mode', kShortAddressModes)
+      .combine('offset', [false, true] as const)
+      .filter(t => t.dim !== 'cube' || t.offset !== true)
       .beginSubcases()
       .combine('samplePoints', kCubeSamplePointMethods)
-      .filter(t => t.samplePoints !== 'cube-edges' || t.viewDimension !== '3d')
-      .combine('addressMode', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const)
-      .combine('minFilter', ['nearest', 'linear'] as const)
-      .combine('offset', [false, true] as const)
-      .filter(t => t.viewDimension !== 'cube' || t.offset !== true)
+      .filter(t => t.samplePoints !== 'cube-edges' || t.dim !== '3d')
   )
   .beforeAllSubcases(t =>
     skipIfTextureFormatNotSupportedNotAvailableOrNotFilterable(t, t.params.format)
   )
   .fn(async t => {
-    const { format, viewDimension, samplePoints, addressMode, minFilter, offset } = t.params;
+    const {
+      format,
+      dim: viewDimension,
+      stage,
+      samplePoints,
+      mode,
+      filt: minFilter,
+      offset,
+    } = t.params;
+    skipIfNeedsFilteringAndIsUnfilterable(t, minFilter, format);
 
-    const [width, height] = chooseTextureSize({ minSize: 8, minBlocks: 2, format, viewDimension });
+    const [width, height] = chooseTextureSize({ minSize: 32, minBlocks: 2, format, viewDimension });
     const depthOrArrayLayers = getDepthOrArrayLayersForViewDimension(viewDimension);
 
     const descriptor: GPUTextureDescriptor = {
@@ -293,19 +319,19 @@ Parameters:
       ...(t.isCompatibility && { textureBindingViewDimension: viewDimension }),
       size: { width, height, depthOrArrayLayers },
       usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING,
-      // MAINTENANCE_TODO: make mipLevelCount always 3
-      mipLevelCount: viewDimension === 'cube' ? 1 : 3,
+      mipLevelCount: 3,
     };
     const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor);
     const sampler: GPUSamplerDescriptor = {
-      addressModeU: addressMode,
-      addressModeV: addressMode,
-      addressModeW: addressMode,
+      addressModeU: kShortAddressModeToAddressMode[mode],
+      addressModeV: kShortAddressModeToAddressMode[mode],
+      addressModeW: kShortAddressModeToAddressMode[mode],
       minFilter,
       magFilter: minFilter,
       mipmapFilter: minFilter,
     };
 
+    const hashInputs = [stage, format, viewDimension, samplePoints, mode, minFilter, offset];
     const calls: TextureCall<vec3>[] = (
       viewDimension === '3d'
         ? generateTextureBuiltinInputs3D(50, {
@@ -314,14 +340,14 @@ Parameters:
             descriptor,
             mipLevel: { num: texture.mipLevelCount, type: 'f32' },
             offset,
-            hashInputs: [format, viewDimension, samplePoints, addressMode, minFilter, offset],
+            hashInputs,
           })
         : generateSamplePointsCube(50, {
             method: samplePoints,
             sampler,
             descriptor,
             mipLevel: { num: texture.mipLevelCount, type: 'f32' },
-            hashInputs: [format, viewDimension, samplePoints, addressMode, minFilter, offset],
+            hashInputs,
           })
     ).map(({ coords, mipLevel, offset }) => {
       return {
@@ -337,14 +363,24 @@ Parameters:
       dimension: viewDimension,
     };
     const textureType = getTextureTypeForTextureViewDimension(viewDimension);
-    const results = await doTextureCalls(t, texture, viewDescriptor, textureType, sampler, calls);
+    const results = await doTextureCalls(
+      t,
+      texture,
+      viewDescriptor,
+      textureType,
+      sampler,
+      calls,
+      stage
+    );
     const res = await checkCallResults(
       t,
       { texels, descriptor, viewDescriptor },
       textureType,
       sampler,
       calls,
-      results
+      results,
+      stage,
+      texture
     );
     t.expectOK(res);
   });
@@ -379,24 +415,26 @@ Parameters:
   )
   .params(u =>
     u
+      .combine('stage', kShortShaderStages)
       .combine('format', kTestableColorFormats)
       .filter(t => isPotentiallyFilterableAndFillable(t.format))
+      .combine('filt', ['nearest', 'linear'] as const)
+      .combine('mode', kShortAddressModes)
       .beginSubcases()
       .combine('samplePoints', kCubeSamplePointMethods)
       .combine('A', ['i32', 'u32'] as const)
-      .combine('addressMode', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const)
-      .combine('minFilter', ['nearest', 'linear'] as const)
   )
   .beforeAllSubcases(t => {
     skipIfTextureFormatNotSupportedNotAvailableOrNotFilterable(t, t.params.format);
     t.skipIfTextureViewDimensionNotSupported('cube-array');
   })
   .fn(async t => {
-    const { format, samplePoints, A, addressMode, minFilter } = t.params;
+    const { format, stage, samplePoints, A, mode, filt: minFilter } = t.params;
+    skipIfNeedsFilteringAndIsUnfilterable(t, minFilter, format);
 
     const viewDimension: GPUTextureViewDimension = 'cube-array';
     const size = chooseTextureSize({
-      minSize: 8,
+      minSize: 32,
       minBlocks: 4,
       format,
       viewDimension,
@@ -405,14 +443,13 @@ Parameters:
       format,
       size,
       usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING,
-      // MAINTENANCE_TODO: Set this to 3. See above.
-      mipLevelCount: 1,
+      mipLevelCount: 3,
     };
     const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor);
     const sampler: GPUSamplerDescriptor = {
-      addressModeU: addressMode,
-      addressModeV: addressMode,
-      addressModeW: addressMode,
+      addressModeU: kShortAddressModeToAddressMode[mode],
+      addressModeV: kShortAddressModeToAddressMode[mode],
+      addressModeW: kShortAddressModeToAddressMode[mode],
       minFilter,
       magFilter: minFilter,
       mipmapFilter: minFilter,
@@ -423,8 +460,8 @@ Parameters:
       sampler,
       descriptor,
       mipLevel: { num: texture.mipLevelCount, type: 'f32' },
-      arrayIndex: { num: texture.depthOrArrayLayers, type: A },
-      hashInputs: [format, viewDimension, samplePoints, addressMode, minFilter],
+      arrayIndex: { num: texture.depthOrArrayLayers / 6, type: A },
+      hashInputs: [stage, format, viewDimension, A, samplePoints, mode, minFilter],
     }).map(({ coords, mipLevel, arrayIndex }) => {
       return {
         builtin: 'textureSampleLevel',
@@ -440,14 +477,24 @@ Parameters:
       dimension: viewDimension,
     };
     const textureType = getTextureTypeForTextureViewDimension(viewDimension);
-    const results = await doTextureCalls(t, texture, viewDescriptor, textureType, sampler, calls);
+    const results = await doTextureCalls(
+      t,
+      texture,
+      viewDescriptor,
+      textureType,
+      sampler,
+      calls,
+      stage
+    );
     const res = await checkCallResults(
       t,
       { texels, descriptor, viewDescriptor },
       textureType,
       sampler,
       calls,
-      results
+      results,
+      stage,
+      texture
     );
     t.expectOK(res);
   });
@@ -456,7 +503,7 @@ g.test('depth_2d_coords')
   .specURL('https://www.w3.org/TR/WGSL/#texturesamplelevel')
   .desc(
     `
-C is i32 or u32
+L is i32 or u32
 
 fn textureSampleLevel(t: texture_depth_2d, s: sampler, coords: vec2<f32>, level: L) -> f32
 fn textureSampleLevel(t: texture_depth_2d, s: sampler, coords: vec2<f32>, level: L, offset: vec2<i32>) -> f32
@@ -480,23 +527,24 @@ Parameters:
   )
   .params(u =>
     u
+      .combine('stage', kShortShaderStages)
       .combine('format', kDepthStencilFormats)
       // filter out stencil only formats
       .filter(t => isDepthTextureFormat(t.format))
       // MAINTENANCE_TODO: Remove when support for depth24plus, depth24plus-stencil8, and depth32float-stencil8 is added.
       .filter(t => isEncodableTextureFormat(t.format))
+      .combine('filt', ['nearest', 'linear'] as const)
+      .combine('mode', kShortAddressModes)
+      .combine('offset', [false, true] as const)
       .beginSubcases()
       .combine('samplePoints', kSamplePointMethods)
-      .combine('addressMode', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const)
-      .combine('minFilter', ['nearest', 'linear'] as const)
       .combine('L', ['i32', 'u32'] as const)
-      .combine('offset', [false, true] as const)
   )
   .beforeAllSubcases(t =>
     skipIfTextureFormatNotSupportedNotAvailableOrNotFilterable(t, t.params.format)
   )
   .fn(async t => {
-    const { format, samplePoints, addressMode, minFilter, L, offset } = t.params;
+    const { format, stage, samplePoints, mode, filt: minFilter, L, offset } = t.params;
 
     // We want at least 4 blocks or something wide enough for 3 mip levels.
     const [width, height] = chooseTextureSize({ minSize: 8, minBlocks: 4, format });
@@ -504,15 +552,12 @@ Parameters:
       format,
       size: { width, height },
       mipLevelCount: 3,
-      usage:
-        GPUTextureUsage.COPY_DST |
-        GPUTextureUsage.TEXTURE_BINDING |
-        GPUTextureUsage.RENDER_ATTACHMENT,
+      usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING,
     };
     const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor);
     const sampler: GPUSamplerDescriptor = {
-      addressModeU: addressMode,
-      addressModeV: addressMode,
+      addressModeU: kShortAddressModeToAddressMode[mode],
+      addressModeV: kShortAddressModeToAddressMode[mode],
       minFilter,
       magFilter: minFilter,
       mipmapFilter: minFilter,
@@ -524,7 +569,7 @@ Parameters:
       descriptor,
       mipLevel: { num: texture.mipLevelCount, type: L },
       offset,
-      hashInputs: [format, samplePoints, addressMode, minFilter, L, offset],
+      hashInputs: [stage, format, samplePoints, mode, minFilter, L, offset],
     }).map(({ coords, mipLevel, offset }) => {
       return {
         builtin: 'textureSampleLevel',
@@ -537,14 +582,24 @@ Parameters:
     });
     const textureType = appendComponentTypeForFormatToTextureType('texture_depth_2d', format);
     const viewDescriptor = {};
-    const results = await doTextureCalls(t, texture, viewDescriptor, textureType, sampler, calls);
+    const results = await doTextureCalls(
+      t,
+      texture,
+      viewDescriptor,
+      textureType,
+      sampler,
+      calls,
+      stage
+    );
     const res = await checkCallResults(
       t,
       { texels, descriptor, viewDescriptor },
       textureType,
       sampler,
       calls,
-      results
+      results,
+      stage,
+      texture
     );
     t.expectOK(res);
   });
@@ -553,7 +608,8 @@ g.test('depth_array_2d_coords')
   .specURL('https://www.w3.org/TR/WGSL/#texturesamplelevel')
   .desc(
     `
-C is i32 or u32
+A is i32 or u32
+L is i32 or u32
 
 fn textureSampleLevel(t: texture_depth_2d_array, s: sampler, coords: vec2<f32>, array_index: A, level: L) -> f32
 fn textureSampleLevel(t: texture_depth_2d_array, s: sampler, coords: vec2<f32>, array_index: A, level: L, offset: vec2<i32>) -> f32
@@ -578,24 +634,25 @@ Parameters:
   )
   .params(u =>
     u
+      .combine('stage', kShortShaderStages)
       .combine('format', kDepthStencilFormats)
       // filter out stencil only formats
       .filter(t => isDepthTextureFormat(t.format))
       // MAINTENANCE_TODO: Remove when support for depth24plus, depth24plus-stencil8, and depth32float-stencil8 is added.
       .filter(t => isEncodableTextureFormat(t.format))
+      .combine('filt', ['nearest', 'linear'] as const)
+      .combine('mode', kShortAddressModes)
+      .combine('offset', [false, true] as const)
       .beginSubcases()
       .combine('samplePoints', kSamplePointMethods)
-      .combine('addressMode', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const)
-      .combine('minFilter', ['nearest', 'linear'] as const)
       .combine('A', ['i32', 'u32'] as const)
       .combine('L', ['i32', 'u32'] as const)
-      .combine('offset', [false, true] as const)
   )
   .beforeAllSubcases(t =>
     skipIfTextureFormatNotSupportedNotAvailableOrNotFilterable(t, t.params.format)
   )
   .fn(async t => {
-    const { format, samplePoints, addressMode, minFilter, A, L, offset } = t.params;
+    const { format, stage, samplePoints, mode, filt: minFilter, A, L, offset } = t.params;
 
     // We want at least 4 blocks or something wide enough for 3 mip levels.
     const [width, height] = chooseTextureSize({ minSize: 8, minBlocks: 4, format });
@@ -603,16 +660,13 @@ Parameters:
       format,
       size: { width, height },
       mipLevelCount: 3,
-      usage:
-        GPUTextureUsage.COPY_DST |
-        GPUTextureUsage.TEXTURE_BINDING |
-        GPUTextureUsage.RENDER_ATTACHMENT,
+      usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING,
       ...(t.isCompatibility && { textureBindingViewDimension: '2d-array' }),
     };
     const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor);
     const sampler: GPUSamplerDescriptor = {
-      addressModeU: addressMode,
-      addressModeV: addressMode,
+      addressModeU: kShortAddressModeToAddressMode[mode],
+      addressModeV: kShortAddressModeToAddressMode[mode],
       minFilter,
       magFilter: minFilter,
       mipmapFilter: minFilter,
@@ -625,7 +679,7 @@ Parameters:
       arrayIndex: { num: texture.depthOrArrayLayers, type: A },
       mipLevel: { num: texture.mipLevelCount, type: L },
       offset,
-      hashInputs: [format, samplePoints, addressMode, minFilter, L, A, offset],
+      hashInputs: [stage, format, samplePoints, mode, minFilter, L, A, offset],
     }).map(({ coords, mipLevel, arrayIndex, offset }) => {
       return {
         builtin: 'textureSampleLevel',
@@ -640,14 +694,24 @@ Parameters:
     });
     const textureType = appendComponentTypeForFormatToTextureType('texture_depth_2d_array', format);
     const viewDescriptor: GPUTextureViewDescriptor = { dimension: '2d-array' };
-    const results = await doTextureCalls(t, texture, viewDescriptor, textureType, sampler, calls);
+    const results = await doTextureCalls(
+      t,
+      texture,
+      viewDescriptor,
+      textureType,
+      sampler,
+      calls,
+      stage
+    );
     const res = await checkCallResults(
       t,
       { texels, descriptor, viewDescriptor },
       textureType,
       sampler,
       calls,
-      results
+      results,
+      stage,
+      texture
     );
     t.expectOK(res);
   });
@@ -656,7 +720,8 @@ g.test('depth_3d_coords')
   .specURL('https://www.w3.org/TR/WGSL/#texturesamplelevel')
   .desc(
     `
-C is i32 or u32
+L is i32 or u32
+A is i32 or u32
 
 fn textureSampleLevel(t: texture_depth_cube, s: sampler, coords: vec3<f32>, level: L) -> f32
 fn textureSampleLevel(t: texture_depth_cube_array, s: sampler, coords: vec3<f32>, array_index: A, level: L) -> f32
@@ -680,6 +745,7 @@ Parameters:
   )
   .params(u =>
     u
+      .combine('stage', kShortShaderStages)
       .combine('format', kDepthStencilFormats)
       // filter out stencil only formats
       .filter(t => isDepthTextureFormat(t.format))
@@ -690,21 +756,21 @@ Parameters:
         { viewDimension: 'cube-array', A: 'i32' },
         { viewDimension: 'cube-array', A: 'u32' },
       ] as const)
+      .combine('filt', ['nearest', 'linear'] as const)
+      .combine('mode', kShortAddressModes)
       .beginSubcases()
       .combine('samplePoints', kCubeSamplePointMethods)
       .combine('L', ['i32', 'u32'] as const)
-      .combine('addressMode', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const)
-      .combine('minFilter', ['nearest', 'linear'] as const)
   )
   .beforeAllSubcases(t => {
     skipIfTextureFormatNotSupportedNotAvailableOrNotFilterable(t, t.params.format);
     t.skipIfTextureViewDimensionNotSupported(t.params.viewDimension);
   })
   .fn(async t => {
-    const { format, viewDimension, samplePoints, A, L, addressMode, minFilter } = t.params;
+    const { format, stage, viewDimension, samplePoints, A, L, mode, filt: minFilter } = t.params;
 
     const size = chooseTextureSize({
-      minSize: 8,
+      minSize: 32,
       minBlocks: 4,
       format,
       viewDimension,
@@ -712,18 +778,15 @@ Parameters:
     const descriptor: GPUTextureDescriptor = {
       format,
       size,
-      usage:
-        GPUTextureUsage.COPY_DST |
-        GPUTextureUsage.TEXTURE_BINDING |
-        GPUTextureUsage.RENDER_ATTACHMENT,
+      usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING,
       mipLevelCount: 3,
       ...(t.isCompatibility && { textureBindingViewDimension: viewDimension }),
     };
     const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor);
     const sampler: GPUSamplerDescriptor = {
-      addressModeU: addressMode,
-      addressModeV: addressMode,
-      addressModeW: addressMode,
+      addressModeU: kShortAddressModeToAddressMode[mode],
+      addressModeV: kShortAddressModeToAddressMode[mode],
+      addressModeW: kShortAddressModeToAddressMode[mode],
       minFilter,
       magFilter: minFilter,
       mipmapFilter: minFilter,
@@ -733,9 +796,9 @@ Parameters:
       method: samplePoints,
       sampler,
       descriptor,
-      mipLevel: { num: texture.mipLevelCount, type: L },
-      arrayIndex: A ? { num: texture.depthOrArrayLayers, type: A } : undefined,
-      hashInputs: [format, viewDimension, samplePoints, addressMode, minFilter],
+      mipLevel: { num: texture.mipLevelCount - 1, type: L },
+      arrayIndex: A ? { num: texture.depthOrArrayLayers / 6, type: A } : undefined,
+      hashInputs: [stage, format, viewDimension, samplePoints, mode, minFilter],
     }).map(({ coords, mipLevel, arrayIndex }) => {
       return {
         builtin: 'textureSampleLevel',
@@ -752,7 +815,15 @@ Parameters:
     };
     const textureType =
       viewDimension === 'cube' ? 'texture_depth_cube' : 'texture_depth_cube_array';
-    const results = await doTextureCalls(t, texture, viewDescriptor, textureType, sampler, calls);
+    const results = await doTextureCalls(
+      t,
+      texture,
+      viewDescriptor,
+      textureType,
+      sampler,
+      calls,
+      stage
+    );
 
     const res = await checkCallResults(
       t,
@@ -760,7 +831,9 @@ Parameters:
       textureType,
       sampler,
       calls,
-      results
+      results,
+      stage,
+      texture
     );
     t.expectOK(res);
   });
diff --git a/src/webgpu/shader/execution/expression/call/builtin/textureStore.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/textureStore.spec.ts
index 09b48b13ce63..e955b82ed603 100644
--- a/src/webgpu/shader/execution/expression/call/builtin/textureStore.spec.ts
+++ b/src/webgpu/shader/execution/expression/call/builtin/textureStore.spec.ts
@@ -743,6 +743,18 @@ g.test('out_of_bounds_array')
         return true;
       })
   )
+  .beforeAllSubcases(t => {
+    if (t.isCompatibility) {
+      t.skipIf(
+        t.params.baseLevel !== 0,
+        'view base array layer must equal 0 in compatibility mode'
+      );
+      t.skipIf(
+        t.params.arrayLevels !== kArrayLevels,
+        'view array layers must equal texture array layers in compatibility mode'
+      );
+    }
+  })
   .fn(t => {
     const dim = '2d';
     const view_dim = '2d-array';
diff --git a/src/webgpu/shader/execution/expression/call/builtin/texture_utils.ts b/src/webgpu/shader/execution/expression/call/builtin/texture_utils.ts
index e997833a137f..b01f3a5e758f 100644
--- a/src/webgpu/shader/execution/expression/call/builtin/texture_utils.ts
+++ b/src/webgpu/shader/execution/expression/call/builtin/texture_utils.ts
@@ -1,18 +1,18 @@
 import { keysOf } from '../../../../../../common/util/data_tables.js';
 import { assert, range, unreachable } from '../../../../../../common/util/util.js';
+import { Float16Array } from '../../../../../../external/petamoriken/float16/float16.js';
 import {
   EncodableTextureFormat,
   isCompressedFloatTextureFormat,
   isCompressedTextureFormat,
   isDepthOrStencilTextureFormat,
+  isDepthTextureFormat,
+  isEncodableTextureFormat,
+  isStencilTextureFormat,
   kEncodableTextureFormats,
   kTextureFormatInfo,
 } from '../../../../../format_info.js';
-import {
-  GPUTest,
-  GPUTestSubcaseBatchState,
-  TextureTestMixinType,
-} from '../../../../../gpu_test.js';
+import { GPUTest, GPUTestSubcaseBatchState } from '../../../../../gpu_test.js';
 import {
   align,
   clamp,
@@ -24,6 +24,7 @@ import {
 } from '../../../../../util/math.js';
 import {
   effectiveViewDimensionForDimension,
+  physicalMipSize,
   physicalMipSizeFromTexture,
   reifyTextureDescriptor,
   SampleCoord,
@@ -37,11 +38,28 @@ import {
   TexelComponent,
   TexelRepresentationInfo,
 } from '../../../../../util/texture/texel_data.js';
-import { TexelView } from '../../../../../util/texture/texel_view.js';
+import { PerPixelAtLevel, TexelView } from '../../../../../util/texture/texel_view.js';
 import { createTextureFromTexelViews } from '../../../../../util/texture.js';
 import { reifyExtent3D } from '../../../../../util/unions.js';
+import { ShaderStage } from '../../../../validation/decl/util.js';
 
-export type SampledType = 'f32' | 'i32' | 'u32';
+// These are needed because the list of parameters was too long when converted to a filename.
+export const kShortShaderStageToShaderStage = {
+  c: 'compute' as ShaderStage,
+  f: 'fragment' as ShaderStage,
+  v: 'vertex' as ShaderStage,
+} as const;
+export const kShortShaderStages = keysOf(kShortShaderStageToShaderStage);
+export type ShortShaderStage = (typeof kShortShaderStages)[number];
+
+// These are needed because the list of parameters was too long when converted to a filename.
+export const kShortAddressModeToAddressMode: Record<string, GPUAddressMode> = {
+  c: 'clamp-to-edge',
+  r: 'repeat',
+  m: 'mirror-repeat',
+};
+
+export const kShortAddressModes = keysOf(kShortAddressModeToAddressMode);
 
 export const kSampleTypeInfo = {
   f32: {
@@ -55,6 +73,17 @@ export const kSampleTypeInfo = {
   },
 } as const;
 
+// MAINTENANCE_TODO: Stop excluding sliced compressed 3d formats.
+export function isSupportedViewFormatCombo(
+  format: GPUTextureFormat,
+  viewDimension: GPUTextureViewDimension
+) {
+  return !(
+    (isCompressedTextureFormat(format) || isDepthTextureFormat(format)) &&
+    viewDimension === '3d'
+  );
+}
+
 /**
  * Return the texture type for a given view dimension
  */
@@ -77,16 +106,68 @@ export function getTextureTypeForTextureViewDimension(viewDimension: GPUTextureV
   }
 }
 
+const is32Float = (format: GPUTextureFormat) =>
+  format === 'r32float' || format === 'rg32float' || format === 'rgba32float';
+
 /**
- * Returns if a texture format can potentially be filtered and can be filled with random data.
+ * Skips a subcase if the filter === 'linear' and the format is type
+ * 'unfilterable-float' and we cannot enable filtering.
  */
-export function isPotentiallyFilterableAndFillable(format: GPUTextureFormat) {
-  const type = kTextureFormatInfo[format].color?.type;
-  const canPotentiallyFilter = type === 'float' || type === 'unfilterable-float';
+export function skipIfNeedsFilteringAndIsUnfilterableOrSelectDevice(
+  t: GPUTestSubcaseBatchState,
+  filter: GPUFilterMode,
+  format: GPUTextureFormat
+) {
+  const features = new Set<GPUFeatureName | undefined>();
+  features.add(kTextureFormatInfo[format].feature);
+
+  if (filter === 'linear') {
+    t.skipIf(isDepthTextureFormat(format), 'depth texture are unfilterable');
+
+    const type = kTextureFormatInfo[format].color?.type;
+    if (type === 'unfilterable-float') {
+      assert(is32Float(format));
+      features.add('float32-filterable');
+    }
+  }
+
+  if (features.size > 0) {
+    t.selectDeviceOrSkipTestCase(Array.from(features));
+  }
+}
+
+/**
+ * Skips a test if filter === 'linear' and the format is not filterable
+ */
+export function skipIfNeedsFilteringAndIsUnfilterable(
+  t: GPUTest,
+  filter: GPUFilterMode,
+  format: GPUTextureFormat
+) {
+  if (filter === 'linear') {
+    t.skipIf(isDepthTextureFormat(format), 'depth textures are unfilterable');
+  }
+}
+
+/**
+ * Returns if a texture format can be filled with random data.
+ */
+export function isFillable(format: GPUTextureFormat) {
   // We can't easily put random bytes into compressed textures if they are float formats
   // since we want the range to be +/- 1000 and not +/- infinity or NaN.
-  const isFillable = !isCompressedTextureFormat(format) || !format.endsWith('float');
-  return canPotentiallyFilter && isFillable;
+  return !isCompressedTextureFormat(format) || !format.endsWith('float');
+}
+
+/**
+ * Returns if a texture format can potentially be filtered and can be filled with random data.
+ */
+export function isPotentiallyFilterableAndFillable(format: GPUTextureFormat) {
+  const info = kTextureFormatInfo[format];
+  const type = info.color?.type ?? info.depth?.type;
+  const canPotentiallyFilter =
+    type === 'float' || type === 'unfilterable-float' || type === 'depth';
+  const result = canPotentiallyFilter && isFillable(format);
+  return result;
 }
 
 /**
@@ -106,77 +187,380 @@ export function skipIfTextureFormatNotSupportedNotAvailableOrNotFilterable(
 }
 
 /**
- * Gets the mip gradient values for the current device.
- * The issue is, different GPUs have different ways of mixing between mip levels.
- * For most GPUs it's linear but for AMD GPUs on Mac in particular, it's something
- * else (which AFAICT is against all the specs).
+ * Splits in array into multiple arrays where every Nth value goes to a different array
+ */
+function unzip<T>(array: T[], num: number) {
+  const arrays: T[][] = range(num, () => []);
+  array.forEach((v, i) => {
+    arrays[i % num].push(v);
+  });
+  return arrays;
+}
+
+type MipWeights = {
+  sampleLevelWeights?: number[];
+  softwareMixToGPUMixGradWeights?: number[];
+};
+type MipWeightType = keyof MipWeights;
+
+function makeGraph(width: number, height: number) {
+  const data = new Uint8Array(width * height);
+
+  return {
+    plot(norm: number, x: number, c: number) {
+      const y = clamp(Math.floor(norm * height), { min: 0, max: height - 1 });
+      const offset = (height - y - 1) * width + x;
+      data[offset] = c;
+    },
+    plotValues(values: Iterable<number>, c: number) {
+      let i = 0;
+      for (const v of values) {
+        this.plot(v, i, c);
+        ++i;
+      }
+    },
+    toString(conversion = ['.', 'e', 'A']) {
+      const lines = [];
+      for (let y = 0; y < height; ++y) {
+        const offset = y * width;
+        lines.push([...data.subarray(offset, offset + width)].map(v => conversion[v]).join(''));
+      }
+      return lines.join('\n');
+    },
+  };
+}
+
+function* linear0to1OverN(n: number) {
+  for (let i = 0; i <= n; ++i) {
+    yield i / n;
+  }
+}
+
+function graphWeights(height: number, weights: number[]) {
+  const graph = makeGraph(weights.length, height);
+  graph.plotValues(linear0to1OverN(weights.length - 1), 1);
+  graph.plotValues(weights, 2);
+  return graph.toString();
+}
+
+/**
+ * Validates the weights go from 0 to 1 in increasing order.
+ */
+function validateWeights(stage: string, weights: number[]) {
+  const showWeights = () => `
+${weights.map((v, i) => `${i.toString().padStart(2)}: ${v}`).join('\n')}
+
+e = expected
+A = actual
+${graphWeights(32, weights)}
+`;
+
+  // Validate the weights
+  assert(
+    weights[0] === 0,
+    `stage: ${stage}, weight 0 expected 0 but was ${weights[0]}\n${showWeights()}`
+  );
+  assert(
+    weights[kMipGradientSteps] === 1,
+    `stage: ${stage}, top weight expected 1 but was ${weights[kMipGradientSteps]}\n${showWeights()}`
+  );
+
+  // Note: for 16 steps, these are the AMD weights
+  //
+  //                 standard
+  // step  mipLevel    gpu        AMD
+  // ----  --------  --------  ----------
+  //  0:   0         0           0
+  //  1:   0.0625    0.0625      0
+  //  2:   0.125     0.125       0.03125
+  //  3:   0.1875    0.1875      0.109375
+  //  4:   0.25      0.25        0.1875
+  //  5:   0.3125    0.3125      0.265625
+  //  6:   0.375     0.375       0.34375
+  //  7:   0.4375    0.4375      0.421875
+  //  8:   0.5       0.5         0.5
+  //  9:   0.5625    0.5625      0.578125
+  // 10:   0.625     0.625       0.65625
+  // 11:   0.6875    0.6875      0.734375
+  // 12:   0.75      0.75        0.8125
+  // 13:   0.8125    0.8125      0.890625
+  // 14:   0.875     0.875       0.96875
+  // 15:   0.9375    0.9375      1
+  // 16:   1         1           1
+  //
+  // notice step 1 is 0 and step 15 is 1.
+  // so we only check the 1 through 14.
+  //
+  // Note: these 2 changes are effectively here to catch Intel Mac
+  // issues and require implementations to work around them.
+  //
+  // Ideally the weights should form a straight line
+  //
+  // +----------------+
+  // |              **|
+  // |            **  |
+  // |          **    |
+  // |        **      |
+  // |      **        |
+  // |    **          |
+  // |  **            |
+  // |**              |
+  // +----------------+
+  //
+  // AMD Mac goes like this: Not great but we allow it
+  //
+  // +----------------+
+  // |             ***|
+  // |           **   |
+  // |          *     |
+  // |        **      |
+  // |      **        |
+  // |     *          |
+  // |   **           |
+  // |***             |
+  // +----------------+
+  //
+  // Intel Mac goes like this: Unacceptable
+  //
+  // +----------------+
+  // |         *******|
+  // |         *      |
+  // |        *       |
+  // |        *       |
+  // |       *        |
+  // |       *        |
+  // |      *         |
+  // |*******         |
+  // +----------------+
+  //
+  const dx = 1 / kMipGradientSteps;
+  for (let i = 0; i < kMipGradientSteps; ++i) {
+    const dy = weights[i + 1] - weights[i];
+    // dy / dx because dy might be 0
+    const slope = dy / dx;
+    assert(
+      slope >= 0,
+      `stage: ${stage}, weight[${i}] was not <= weight[${i + 1}]\n${showWeights()}`
+    );
+    assert(
+      slope <= 2,
+      `stage: ${stage}, slope from weight[${i}] to weight[${i + 1}] is > 2.\n${showWeights()}`
+    );
+  }
+
+  assert(
+    new Set(weights).size >= ((weights.length * 0.66) | 0),
+    `stage: ${stage}, expected more unique weights\n${showWeights()}`
+  );
+}
+
+/**
+ * In an attempt to pass on more devices without lowering the tolerances
+ * so low they are meaningless, we ask the hardware to tell us, for a given
+ * gradient, level, what mix weights are being used.
  *
- * We seemingly have 3 options:
+ * This is done by drawing instanced quads and using instance_index to
+ * write out results into an array. We sample a 2x2 pixel texture with
+ * 2 mip levels and set the 2nd mip level to white. This means the value
+ * we get back represents the weight used to mix the 2 mip levels.
  *
- * 1. Increase the tolerances of tests so they pass on AMD.
- * 2. Mark AMD as failing
- * 3. Try to figure out how the GPU converts mip levels into weights
+ * Just as a record of some differences across GPUs
  *
- * We're doing 3.
+ * level weights: mapping from the mip level
+ * parameter of `textureSampleLevel` to
+ * the mix weight used by the GPU
  *
- * There's an assumption that the gradient will be the same for all formats
- * and usages.
+ * +--------+--------+--------+--------+
+ * |        |        | intel  | amd    |
+ * |        |  m1    | gen-9  | rna-1  |
+ * | level  |  mac   | mac    | mac    |
+ * +--------+--------+--------+--------+
+ * | 0.0000 | 0.0000 | 0.0000 | 0.0000 |
+ * | 0.0313 | 0.0314 | 0.0313 | 0.0000 |
+ * | 0.0625 | 0.0625 | 0.0625 | 0.0000 |
+ * | 0.0938 | 0.0939 | 0.0938 | 0.0000 |
+ * | 0.1250 | 0.1250 | 0.1250 | 0.0313 |
+ * | 0.1563 | 0.1564 | 0.1563 | 0.0703 |
+ * | 0.1875 | 0.1875 | 0.1875 | 0.1094 |
+ * | 0.2188 | 0.2189 | 0.2188 | 0.1484 |
+ * | 0.2500 | 0.2500 | 0.2500 | 0.1875 |
+ * | 0.2813 | 0.2814 | 0.2813 | 0.2266 |
+ * | 0.3125 | 0.3125 | 0.3125 | 0.2656 |
+ * | 0.3438 | 0.3439 | 0.3438 | 0.3047 |
+ * | 0.3750 | 0.3750 | 0.3750 | 0.3438 |
+ * | 0.4063 | 0.4064 | 0.4063 | 0.3828 |
+ * | 0.4375 | 0.4375 | 0.4375 | 0.4219 |
+ * | 0.4688 | 0.4689 | 0.4688 | 0.4609 |
+ * | 0.5000 | 0.5000 | 0.5000 | 0.5000 |
+ * | 0.5313 | 0.5314 | 0.5313 | 0.5391 |
+ * | 0.5625 | 0.5625 | 0.5625 | 0.5781 |
+ * | 0.5938 | 0.5939 | 0.5938 | 0.6172 |
+ * | 0.6250 | 0.6250 | 0.6250 | 0.6563 |
+ * | 0.6563 | 0.6564 | 0.6563 | 0.6953 |
+ * | 0.6875 | 0.6875 | 0.6875 | 0.7344 |
+ * | 0.7188 | 0.7189 | 0.7188 | 0.7734 |
+ * | 0.7500 | 0.7500 | 0.7500 | 0.8125 |
+ * | 0.7813 | 0.7814 | 0.7813 | 0.8516 |
+ * | 0.8125 | 0.8125 | 0.8125 | 0.8906 |
+ * | 0.8438 | 0.8439 | 0.8438 | 0.9297 |
+ * | 0.8750 | 0.8750 | 0.8750 | 0.9688 |
+ * | 0.9063 | 0.9064 | 0.9063 | 1.0000 |
+ * | 0.9375 | 0.9375 | 0.9375 | 1.0000 |
+ * | 0.9688 | 0.9689 | 0.9688 | 1.0000 |
+ * | 1.0000 | 1.0000 | 1.0000 | 1.0000 |
+ * +--------+--------+--------+--------+
+ *
+ * grad weights: mapping from ddx value
+ * passed into `textureSampleGrad` to
+ * the mix weight used by the GPU
+ *
+ * +--------+--------+--------+--------+
+ * |        |        | intel  | amd    |
+ * |        |  m1    | gen-9  | rna-1  |
+ * |  ddx   |  mac   | mac    | mac    |
+ * +--------+--------+--------+--------+
+ * | 0.5000 | 0.0000 | 0.0000 | 0.0000 |
+ * | 0.5109 | 0.0390 | 0.0430 | 0.0000 |
+ * | 0.5221 | 0.0821 | 0.0859 | 0.0000 |
+ * | 0.5336 | 0.1211 | 0.1289 | 0.0352 |
+ * | 0.5453 | 0.1600 | 0.1719 | 0.0898 |
+ * | 0.5572 | 0.2032 | 0.2109 | 0.1328 |
+ * | 0.5694 | 0.2422 | 0.2461 | 0.1797 |
+ * | 0.5819 | 0.2814 | 0.2852 | 0.2305 |
+ * | 0.5946 | 0.3203 | 0.3203 | 0.2773 |
+ * | 0.6076 | 0.3554 | 0.3594 | 0.3164 |
+ * | 0.6209 | 0.3868 | 0.3906 | 0.3633 |
+ * | 0.6345 | 0.4218 | 0.4258 | 0.4063 |
+ * | 0.6484 | 0.4532 | 0.4609 | 0.4492 |
+ * | 0.6626 | 0.4882 | 0.4922 | 0.4883 |
+ * | 0.6771 | 0.5196 | 0.5234 | 0.5273 |
+ * | 0.6920 | 0.5507 | 0.5547 | 0.5664 |
+ * | 0.7071 | 0.5860 | 0.5859 | 0.6055 |
+ * | 0.7226 | 0.6132 | 0.6133 | 0.6406 |
+ * | 0.7384 | 0.6407 | 0.6445 | 0.6797 |
+ * | 0.7546 | 0.6679 | 0.6719 | 0.7148 |
+ * | 0.7711 | 0.6953 | 0.6992 | 0.7461 |
+ * | 0.7880 | 0.7225 | 0.7266 | 0.7813 |
+ * | 0.8052 | 0.7500 | 0.7539 | 0.8164 |
+ * | 0.8229 | 0.7814 | 0.7813 | 0.8516 |
+ * | 0.8409 | 0.8086 | 0.8086 | 0.8828 |
+ * | 0.8593 | 0.8321 | 0.8320 | 0.9141 |
+ * | 0.8781 | 0.8554 | 0.8594 | 0.9492 |
+ * | 0.8974 | 0.8789 | 0.8828 | 0.9766 |
+ * | 0.9170 | 0.9025 | 0.9063 | 1.0000 |
+ * | 0.9371 | 0.9297 | 0.9297 | 1.0000 |
+ * | 0.9576 | 0.9532 | 0.9531 | 1.0000 |
+ * | 0.9786 | 0.9765 | 0.9766 | 1.0000 |
+ * | 1.0000 | 1.0000 | 1.0000 | 1.0000 |
+ * +--------+--------+--------+--------+
  */
-const kMipGradientSteps = 16;
-const s_deviceToMipGradientValues = new WeakMap<GPUDevice, number[]>();
-async function initMipGradientValuesForDevice(t: GPUTest) {
+
+async function queryMipGradientValuesForDevice(t: GPUTest, stage: ShaderStage) {
   const { device } = t;
-  const weights = s_deviceToMipGradientValues.get(device);
-  if (!weights) {
-    const module = device.createShaderModule({
-      code: `
-        @group(0) @binding(0) var tex: texture_2d<f32>;
-        @group(0) @binding(1) var smp: sampler;
-        @group(0) @binding(2) var<storage, read_write> result: array<f32>;
+  const kNumWeightTypes = 2;
+  const module = device.createShaderModule({
+    code: `
+      @group(0) @binding(0) var tex: texture_2d<f32>;
+      @group(0) @binding(1) var smp: sampler;
+      @group(0) @binding(2) var<storage, read_write> result: array<f32>;
+
+      struct VSOutput {
+        @builtin(position) pos: vec4f,
+        @location(0) @interpolate(flat, either) ndx: u32,
+        @location(1) @interpolate(flat, either) result: vec4f,
+      };
 
-        @compute @workgroup_size(1) fn cs(@builtin(global_invocation_id) id: vec3u) {
-          let mipLevel = f32(id.x) / ${kMipGradientSteps};
-          result[id.x] = textureSampleLevel(tex, smp, vec2f(0.5), mipLevel).r;
+      fn getMixLevels(wNdx: u32) -> vec4f {
+        let mipLevel = f32(wNdx) / ${kMipGradientSteps};
+        let size = textureDimensions(tex);
+        let g = mix(1.0, 2.0, mipLevel) / f32(size.x);
+        let ddx = vec2f(g, 0);
+        return vec4f(
+          textureSampleLevel(tex, smp, vec2f(0.5), mipLevel).r,
+          textureSampleGrad(tex, smp, vec2f(0.5), ddx, vec2f(0)).r,
+          0,
+          0);
+      }
+
+      fn recordMixLevels(wNdx: u32, r: vec4f) {
+        let ndx = wNdx * ${kNumWeightTypes};
+        for (var i: u32 = 0; i < ${kNumWeightTypes}; i++) {
+          result[ndx + i] = r[i];
         }
-      `,
-    });
+      }
 
-    const pipeline = device.createComputePipeline({
-      layout: 'auto',
-      compute: { module },
-    });
+      fn getPosition(vNdx: u32) -> vec4f {
+        let pos = array(
+          vec2f(-1,  3),
+          vec2f( 3, -1),
+          vec2f(-1, -1),
+        );
+        let p = pos[vNdx];
+        return vec4f(p, 0, 1);
+      }
 
-    const texture = t.createTextureTracked({
-      size: [2, 2, 1],
-      format: 'r8unorm',
-      usage: GPUTextureUsage.TEXTURE_BINDING | GPUTextureUsage.COPY_DST,
-      mipLevelCount: 2,
-    });
+      @vertex fn vs(@builtin(vertex_index) vNdx: u32, @builtin(instance_index) iNdx: u32) -> VSOutput {
+        return VSOutput(getPosition(vNdx), iNdx, vec4f(0));
+      }
 
-    device.queue.writeTexture(
-      { texture, mipLevel: 1 },
-      new Uint8Array([255]),
-      { bytesPerRow: 1 },
-      [1, 1]
-    );
+      @fragment fn fsRecord(v: VSOutput) -> @location(0) vec4f {
+        recordMixLevels(v.ndx, getMixLevels(v.ndx));
+        return vec4f(0);
+      }
 
-    const sampler = device.createSampler({
-      minFilter: 'linear',
-      magFilter: 'linear',
-      mipmapFilter: 'linear',
-    });
+      @compute @workgroup_size(1) fn csRecord(@builtin(global_invocation_id) id: vec3u) {
+        recordMixLevels(id.x, getMixLevels(id.x));
+      }
 
-    const storageBuffer = t.createBufferTracked({
-      size: 4 * (kMipGradientSteps + 1),
-      usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC,
-    });
+      @vertex fn vsRecord(@builtin(vertex_index) vNdx: u32, @builtin(instance_index) iNdx: u32) -> VSOutput {
+        return VSOutput(getPosition(vNdx), iNdx, getMixLevels(iNdx));
+      }
 
-    const resultBuffer = t.createBufferTracked({
-      size: storageBuffer.size,
-      usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ,
-    });
+      @fragment fn fsSaveVs(v: VSOutput) -> @location(0) vec4f {
+        recordMixLevels(v.ndx, v.result);
+        return vec4f(0);
+      }
+    `,
+  });
 
-    const bindGroup = device.createBindGroup({
+  const texture = t.createTextureTracked({
+    size: [2, 2, 1],
+    format: 'r8unorm',
+    usage: GPUTextureUsage.TEXTURE_BINDING | GPUTextureUsage.COPY_DST,
+    mipLevelCount: 2,
+  });
+
+  device.queue.writeTexture(
+    { texture, mipLevel: 1 },
+    new Uint8Array([255]),
+    { bytesPerRow: 1 },
+    [1, 1]
+  );
+
+  const sampler = device.createSampler({
+    minFilter: 'linear',
+    magFilter: 'linear',
+    mipmapFilter: 'linear',
+  });
+
+  const target = t.createTextureTracked({
+    size: [1, 1],
+    format: 'rgba8unorm',
+    usage: GPUTextureUsage.RENDER_ATTACHMENT,
+  });
+
+  const storageBuffer = t.createBufferTracked({
+    size: 4 * (kMipGradientSteps + 1) * kNumWeightTypes,
+    usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC,
+  });
+
+  const resultBuffer = t.createBufferTracked({
+    size: storageBuffer.size,
+    usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ,
+  });
+
+  const createBindGroup = (pipeline: GPUComputePipeline | GPURenderPipeline) =>
+    device.createBindGroup({
       layout: pipeline.getBindGroupLayout(0),
       entries: [
         { binding: 0, resource: texture.createView() },
@@ -185,79 +569,268 @@ async function initMipGradientValuesForDevice(t: GPUTest) {
       ],
     });
 
-    const encoder = device.createCommandEncoder();
-    const pass = encoder.beginComputePass();
-    pass.setPipeline(pipeline);
-    pass.setBindGroup(0, bindGroup);
-    pass.dispatchWorkgroups(kMipGradientSteps + 1);
-    pass.end();
-    encoder.copyBufferToBuffer(storageBuffer, 0, resultBuffer, 0, resultBuffer.size);
-    device.queue.submit([encoder.finish()]);
+  const encoder = device.createCommandEncoder();
+  switch (stage) {
+    case 'compute': {
+      const pipeline = device.createComputePipeline({
+        layout: 'auto',
+        compute: { module },
+      });
+      const pass = encoder.beginComputePass();
+      pass.setPipeline(pipeline);
+      pass.setBindGroup(0, createBindGroup(pipeline));
+      pass.dispatchWorkgroups(kMipGradientSteps + 1);
+      pass.end();
+      break;
+    }
+    case 'fragment': {
+      const pipeline = device.createRenderPipeline({
+        layout: 'auto',
+        vertex: { module, entryPoint: 'vs' },
+        fragment: { module, entryPoint: 'fsRecord', targets: [{ format: 'rgba8unorm' }] },
+      });
+      const pass = encoder.beginRenderPass({
+        colorAttachments: [
+          {
+            view: target.createView(),
+            loadOp: 'clear',
+            storeOp: 'store',
+          },
+        ],
+      });
+      pass.setPipeline(pipeline);
+      pass.setBindGroup(0, createBindGroup(pipeline));
+      pass.draw(3, kMipGradientSteps + 1);
+      pass.end();
+      break;
+    }
+    case 'vertex': {
+      const pipeline = device.createRenderPipeline({
+        layout: 'auto',
+        vertex: { module, entryPoint: 'vsRecord' },
+        fragment: { module, entryPoint: 'fsSaveVs', targets: [{ format: 'rgba8unorm' }] },
+      });
+      const pass = encoder.beginRenderPass({
+        colorAttachments: [
+          {
+            view: target.createView(),
+            loadOp: 'clear',
+            storeOp: 'store',
+          },
+        ],
+      });
+      pass.setPipeline(pipeline);
+      pass.setBindGroup(0, createBindGroup(pipeline));
+      pass.draw(3, kMipGradientSteps + 1);
+      pass.end();
+      break;
+    }
+  }
+  encoder.copyBufferToBuffer(storageBuffer, 0, resultBuffer, 0, resultBuffer.size);
+  device.queue.submit([encoder.finish()]);
 
-    await resultBuffer.mapAsync(GPUMapMode.READ);
-    const weights = Array.from(new Float32Array(resultBuffer.getMappedRange()));
-    resultBuffer.unmap();
+  await resultBuffer.mapAsync(GPUMapMode.READ);
+  const result = Array.from(new Float32Array(resultBuffer.getMappedRange()));
+  resultBuffer.unmap();
+  resultBuffer.destroy();
 
-    texture.destroy();
-    storageBuffer.destroy();
-    resultBuffer.destroy();
+  const [sampleLevelWeights, gradWeights] = unzip(result, kNumWeightTypes);
+
+  validateWeights(stage, sampleLevelWeights);
+  validateWeights(stage, gradWeights);
+
+  texture.destroy();
+  storageBuffer.destroy();
+
+  return {
+    sampleLevelWeights,
+    softwareMixToGPUMixGradWeights: generateSoftwareMixToGPUMixGradWeights(gradWeights, texture),
+  };
+}
 
-    // Validate the weights
-    assert(weights[0] === 0);
-    assert(weights[kMipGradientSteps] === 1);
-    assert(weights[kMipGradientSteps / 2] === 0.5);
-
-    // Note: for 16 steps, these are the AMD weights
-    //
-    //                 standard
-    // step  mipLevel    gpu        AMD
-    // ----  --------  --------  ----------
-    //  0:   0         0           1
-    //  1:   0.0625    0.0625      0
-    //  2:   0.125     0.125       0.03125
-    //  3:   0.1875    0.1875      0.109375
-    //  4:   0.25      0.25        0.1875
-    //  5:   0.3125    0.3125      0.265625
-    //  6:   0.375     0.375       0.34375
-    //  7:   0.4375    0.4375      0.421875
-    //  8:   0.5       0.5         0.5
-    //  9:   0.5625    0.5625      0.578125
-    // 10:   0.625     0.625       0.65625
-    // 11:   0.6875    0.6875      0.734375
-    // 12:   0.75      0.75        0.8125
-    // 13:   0.8125    0.8125      0.890625
-    // 14:   0.875     0.875       0.96875
-    // 15:   0.9375    0.9375      1
-    // 16:   1         1           1
-    //
-    // notice step 1 is 0 and step 15 is 1.
-    // so we only check the 1 through 14.
-    for (let i = 1; i < kMipGradientSteps - 1; ++i) {
-      assert(weights[i] < weights[i + 1]);
+// Given an array of ascending values and a value v, finds
+// which indices in the array v is between. Returns the lower
+// index and the mix weight between the 2 indices for v.
+//
+// In other words, if values = [10, 20, 30, 40, 50]
+//
+//    getIndexAndWeight(values, 38)  -> [2, 0.8]
+//
+// Example:
+//
+//    values = [10, 20, 30, 40, 50]
+//    v = 38
+//    [ndx, weight] = getIndexAndWeight(values, v);
+//    v2 = lerp(values[ndx], values[ndx + 1], weight);
+//    assert(v === v2)
+function getIndexAndWeight(values: readonly number[], v: number) {
+  assert(v >= values[0] && v <= values[values.length - 1]);
+  let lo = 0;
+  let hi = values.length - 1;
+  for (;;) {
+    const i = (lo + (hi - lo) / 2) | 0;
+    const w0 = values[i];
+    const w1 = values[i + 1];
+    if (lo === hi || (v >= w0 && v <= w1)) {
+      const weight = (v - w0) / (w1 - w0);
+      return [i, weight];
     }
+    if (v < w0) {
+      hi = i;
+    } else {
+      lo = i + 1;
+    }
+  }
+}
+
+/**
+ * Given a fractional number between 0 and values.length returns the value between
+ * 2 values. Effectively lerp(values[ndx], values[ndx + 1], weight)
+ */
+function bilinearFilter(values: readonly number[], ndx: number, weight: number) {
+  const v0 = values[ndx];
+  const v1 = values[ndx + 1] ?? 0;
+  assert(ndx < values.length - 1 || (ndx === values.length - 1 && weight === 0));
+  return lerp(v0, v1, weight);
+}
+
+/**
+ * Generates an array of values that maps between the software renderer's gradient computed
+ * mip level and the GPUs gradient computed mip level for mip level 0 to 1.
+ */
+function generateSoftwareMixToGPUMixGradWeights(gpuWeights: number[], texture: GPUTexture) {
+  const numSteps = gpuWeights.length - 1;
+  const size = [texture.width, texture.height, texture.depthOrArrayLayers];
+  const softwareWeights = range(numSteps + 1, i => {
+    // u goes from 0 to 1
+    const u = i / numSteps;
+    const g = lerp(1, 2, u) / texture.width;
+    const mipLevel = computeMipLevelFromGradients([g], [0], size);
+    assert(mipLevel >= 0 && mipLevel <= 1);
+    return mipLevel;
+  });
+  const softwareMixToGPUMixMap = range(numSteps + 1, i => {
+    const mix = i / numSteps;
+    const [ndx, weight] = getIndexAndWeight(softwareWeights, mix);
+    return bilinearFilter(gpuWeights, ndx, weight);
+  });
+  return softwareMixToGPUMixMap;
+}
+
+function mapSoftwareMipLevelToGPUMipLevel(t: GPUTest, stage: ShaderStage, mipLevel: number) {
+  const baseLevel = Math.floor(mipLevel);
+  const softwareMix = mipLevel - baseLevel;
+  const gpuMix = getMixWeightByTypeForMipLevel(
+    t,
+    stage,
+    'softwareMixToGPUMixGradWeights',
+    softwareMix
+  );
+  return baseLevel + gpuMix;
+}
+
+const euclideanModulo = (n: number, m: number) => ((n % m) + m) % m;
 
-    s_deviceToMipGradientValues.set(device, weights);
+/**
+ * Gets the mip gradient values for the current device.
+ * The issue is, different GPUs have different ways of mixing between mip levels.
+ * For most GPUs it's linear but for AMD GPUs on Mac in particular, it's something
+ * else (which AFAICT is against all the specs).
+ *
+ * We seemingly have 3 options:
+ *
+ * 1. Increase the tolerances of tests so they pass on AMD.
+ * 2. Mark AMD as failing
+ * 3. Try to figure out how the GPU converts mip levels into weights
+ *
+ * We're doing 3.
+ *
+ * There's an assumption that the gradient will be the same for all formats
+ * and usages.
+ *
+ * Note: The code below has 2 maps. One device->Promise, the other device->weights
+ * device->weights is meant to be used synchronously by other code so we don't
+ * want to leave initMipGradientValuesForDevice until the weights have been read.
+ * But, multiple subcases will run because this function is async. So, subcase 1
+ * runs, hits this init code, this code waits for the weights. Then, subcase 2
+ * runs and hits this init code. The weights will not be in the device->weights map
+ * yet which is why we have the device->Promise map. This is so subcase 2 waits
+ * for subcase 1's "query the weights" step. Otherwise, all subcases would do the
+ * "get the weights" step separately.
+ */
+const kMipGradientSteps = 64;
+const s_deviceToMipGradientValuesPromise = new WeakMap<
+  GPUDevice,
+  Record<ShaderStage, Promise<MipWeights>>
+>();
+const s_deviceToMipGradientValues = new WeakMap<GPUDevice, Record<ShaderStage, MipWeights>>();
+
+async function initMipGradientValuesForDevice(t: GPUTest, stage: ShaderStage) {
+  const { device } = t;
+  // Get the per stage promises (or make them)
+  const stageWeightsP =
+    s_deviceToMipGradientValuesPromise.get(device) ??
+    ({} as Record<ShaderStage, Promise<MipWeights>>);
+  s_deviceToMipGradientValuesPromise.set(device, stageWeightsP);
+
+  let weightsP = stageWeightsP[stage];
+  if (!weightsP) {
+    // There was no promise for this weight so request it
+    // and add a then clause so the first thing that will happen
+    // when the promise resolves is that we'll record the weights for
+    // that stage.
+    weightsP = queryMipGradientValuesForDevice(t, stage);
+    weightsP
+      .then(weights => {
+        const stageWeights =
+          s_deviceToMipGradientValues.get(device) ?? ({} as Record<ShaderStage, MipWeights>);
+        s_deviceToMipGradientValues.set(device, stageWeights);
+        stageWeights[stage] = weights;
+      })
+      .catch(e => {
+        throw e;
+      });
+    stageWeightsP[stage] = weightsP;
   }
+  return await weightsP;
 }
 
-function getWeightForMipLevel(t: GPUTest, mipLevelCount: number, mipLevel: number) {
-  if (mipLevel < 0 || mipLevel >= mipLevelCount) {
-    return 1;
+function getMixWeightByTypeForMipLevel(
+  t: GPUTest,
+  stage: ShaderStage,
+  weightType: MipWeightType | 'identity',
+  mipLevel: number
+) {
+  if (weightType === 'identity') {
+    return euclideanModulo(mipLevel, 1);
   }
   // linear interpolate between weights
-  const weights = s_deviceToMipGradientValues.get(t.device);
+  const weights = s_deviceToMipGradientValues.get(t.device)![stage][weightType];
   assert(
     !!weights,
     'you must use WGSLTextureSampleTest or call initializeDeviceMipWeights before calling this function'
   );
   const steps = weights.length - 1;
-  const w = (mipLevel % 1) * steps;
+  const w = euclideanModulo(mipLevel, 1) * steps;
   const lowerNdx = Math.floor(w);
   const upperNdx = Math.ceil(w);
   const mix = w % 1;
   return lerp(weights[lowerNdx], weights[upperNdx], mix);
 }
 
+function getWeightForMipLevel(
+  t: GPUTest,
+  stage: ShaderStage,
+  weightType: MipWeightType | 'identity',
+  mipLevelCount: number,
+  mipLevel: number
+) {
+  if (mipLevel < 0 || mipLevel >= mipLevelCount) {
+    return 1;
+  }
+  return getMixWeightByTypeForMipLevel(t, stage, weightType, mipLevel);
+}
+
 /**
  * Used for textureDimension, textureNumLevels, textureNumLayers
  */
@@ -305,7 +878,6 @@ export class WGSLTextureQueryTest extends GPUTest {
 export class WGSLTextureSampleTest extends GPUTest {
   override async init(): Promise<void> {
     await super.init();
-    await initMipGradientValuesForDevice(this);
   }
 }
 
@@ -329,17 +901,16 @@ function getLimitValue(v: number) {
   }
 }
 
-function getValueBetweenMinAndMaxTexelValueInclusive(
+function getMinAndMaxTexelValueForComponent(
   rep: TexelRepresentationInfo,
-  component: TexelComponent,
-  normalized: number
+  component: TexelComponent
 ) {
   assert(!!rep.numericRange);
   const perComponentRanges = rep.numericRange as PerComponentNumericRange;
   const perComponentRange = perComponentRanges[component];
   const range = rep.numericRange as NumericRange;
   const { min, max } = perComponentRange ? perComponentRange : range;
-  return lerp(getLimitValue(min), getLimitValue(max), normalized);
+  return { min: getLimitValue(min), max: getLimitValue(max) };
 }
 
 /**
@@ -397,16 +968,72 @@ export function appendComponentTypeForFormatToTextureType(base: string, format:
     : `${base}<${getTextureFormatTypeInfo(format).componentType}>`;
 }
 
+type RandomTextureOptions = {
+  generator: PerPixelAtLevel<PerTexelComponent<number>>;
+};
+
 /**
- * Creates a TexelView filled with random values.
+ * Make a generator for texels for depth comparison tests.
  */
-export function createRandomTexelView(info: {
-  format: GPUTextureFormat;
-  size: GPUExtent3D;
-  mipLevel: number;
-}): TexelView {
+export function makeRandomDepthComparisonTexelGenerator(
+  info: {
+    format: GPUTextureFormat;
+    size: GPUExtent3D;
+  },
+  comparison: GPUCompareFunction
+) {
   const rep = kTexelRepresentationInfo[info.format as EncodableTextureFormat];
   const size = reifyExtent3D(info.size);
+
+  const comparisonIsEqualOrNotEqual = comparison === 'equal' || comparison === 'not-equal';
+
+  // for equal and not-equal we just want to test 0, 0.6, and 1
+  // for everything else we want 0 to 1
+  // Note: 0.6 is chosen because we'll never choose 0.6 as our depth reference
+  // value. (see generateTextureBuiltinInputsImpl and generateSamplePointsCube)
+  // The problem with comparing equal is other than 0.0 and 1.0, no other
+  // values are guaranteed to be equal.
+  const fixedValues = [0, 0.6, 1, 1];
+  const format = comparisonIsEqualOrNotEqual
+    ? (norm: number) => fixedValues[(norm * (fixedValues.length - 1)) | 0]
+    : (norm: number) => norm;
+
+  return (coords: SampleCoord): Readonly<PerTexelComponent<number>> => {
+    const texel: PerTexelComponent<number> = {};
+    for (const component of rep.componentOrder) {
+      const rnd = hashU32(
+        coords.x,
+        coords.y,
+        coords.z,
+        coords.sampleIndex ?? 0,
+        component.charCodeAt(0),
+        size.width,
+        size.height,
+        size.depthOrArrayLayers
+      );
+      const normalized = clamp(rnd / 0xffffffff, { min: 0, max: 1 });
+      texel[component] = format(normalized);
+    }
+    return quantize(texel, rep);
+  };
+}
+
+function createRandomTexelViewViaColors(
+  info: {
+    format: GPUTextureFormat;
+    size: GPUExtent3D;
+    mipLevel: number;
+  },
+  options?: RandomTextureOptions | undefined
+): TexelView {
+  const rep = kTexelRepresentationInfo[info.format as EncodableTextureFormat];
+  const size = reifyExtent3D(info.size);
+  const minMax = Object.fromEntries(
+    rep.componentOrder.map(component => [
+      component,
+      getMinAndMaxTexelValueForComponent(rep, component),
+    ])
+  );
   const generator = (coords: SampleCoord): Readonly<PerTexelComponent<number>> => {
     const texel: PerTexelComponent<number> = {};
     for (const component of rep.componentOrder) {
@@ -422,30 +1049,125 @@ export function createRandomTexelView(info: {
         size.depthOrArrayLayers
       );
       const normalized = clamp(rnd / 0xffffffff, { min: 0, max: 1 });
-      texel[component] = getValueBetweenMinAndMaxTexelValueInclusive(rep, component, normalized);
+      const { min, max } = minMax[component];
+      texel[component] = lerp(min, max, normalized);
     }
     return quantize(texel, rep);
   };
-  return TexelView.fromTexelsAsColors(info.format as EncodableTextureFormat, generator);
+  return TexelView.fromTexelsAsColors(
+    info.format as EncodableTextureFormat,
+    options?.generator ?? generator
+  );
+}
+
+function createRandomTexelViewViaBytes(info: {
+  format: GPUTextureFormat;
+  size: GPUExtent3D;
+  mipLevel: number;
+  sampleCount: number;
+}): TexelView {
+  const { format } = info;
+  const formatInfo = kTextureFormatInfo[format];
+  const rep = kTexelRepresentationInfo[info.format as EncodableTextureFormat];
+  assert(!!rep);
+  const bytesPerBlock = (formatInfo.color?.bytes ?? formatInfo.stencil?.bytes)!;
+  assert(bytesPerBlock > 0);
+  const size = physicalMipSize(reifyExtent3D(info.size), info.format, '2d', 0);
+  const blocksAcross = Math.ceil(size.width / formatInfo.blockWidth);
+  const blocksDown = Math.ceil(size.height / formatInfo.blockHeight);
+  const bytesPerRow = blocksAcross * bytesPerBlock * info.sampleCount;
+  const bytesNeeded = bytesPerRow * blocksDown * size.depthOrArrayLayers;
+  const data = new Uint8Array(bytesNeeded);
+
+  const hashBase =
+    sumOfCharCodesOfString(info.format) +
+    size.width +
+    size.height +
+    size.depthOrArrayLayers +
+    info.mipLevel +
+    info.sampleCount;
+
+  if (info.format.includes('32float') || info.format.includes('16float')) {
+    const { min, max } = getMinAndMaxTexelValueForComponent(rep, TexelComponent.R);
+    const asFloat = info.format.includes('32float')
+      ? new Float32Array(data.buffer)
+      : new Float16Array(data.buffer);
+    for (let i = 0; i < asFloat.length; ++i) {
+      asFloat[i] = lerp(min, max, hashU32(hashBase + i) / 0xffff_ffff);
+    }
+  } else if (bytesNeeded % 4 === 0) {
+    const asU32 = new Uint32Array(data.buffer);
+    for (let i = 0; i < asU32.length; ++i) {
+      asU32[i] = hashU32(hashBase + i);
+    }
+  } else {
+    for (let i = 0; i < bytesNeeded; ++i) {
+      data[i] = hashU32(hashBase + i);
+    }
+  }
+
+  return TexelView.fromTextureDataByReference(info.format as EncodableTextureFormat, data, {
+    bytesPerRow,
+    rowsPerImage: size.height,
+    subrectOrigin: [0, 0, 0],
+    subrectSize: size,
+  });
+}
+
+/**
+ * Creates a TexelView filled with random values.
+ */
+function createRandomTexelView(
+  info: {
+    format: GPUTextureFormat;
+    size: GPUExtent3D;
+    mipLevel: number;
+    sampleCount: number;
+  },
+  options?: RandomTextureOptions | undefined
+): TexelView {
+  assert(!isCompressedTextureFormat(info.format));
+  const formatInfo = kTextureFormatInfo[info.format];
+  const type = formatInfo.color?.type ?? formatInfo.depth?.type ?? formatInfo.stencil?.type;
+  const canFillWithRandomTypedData =
+    !options &&
+    isEncodableTextureFormat(info.format) &&
+    ((info.format.includes('norm') && type !== 'depth') ||
+      info.format.includes('16float') ||
+      (info.format.includes('32float') && type !== 'depth') ||
+      type === 'sint' ||
+      type === 'uint');
+
+  return canFillWithRandomTypedData
+    ? createRandomTexelViewViaBytes(info)
+    : createRandomTexelViewViaColors(info, options);
 }
 
 /**
  * Creates a mip chain of TexelViews filled with random values
  */
-export function createRandomTexelViewMipmap(info: {
-  format: GPUTextureFormat;
-  size: GPUExtent3D;
-  mipLevelCount?: number;
-  dimension?: GPUTextureDimension;
-}): TexelView[] {
+function createRandomTexelViewMipmap(
+  info: {
+    format: GPUTextureFormat;
+    size: GPUExtent3D;
+    mipLevelCount?: number;
+    dimension?: GPUTextureDimension;
+    sampleCount?: number;
+  },
+  options?: RandomTextureOptions | undefined
+): TexelView[] {
   const mipLevelCount = info.mipLevelCount ?? 1;
   const dimension = info.dimension ?? '2d';
   return range(mipLevelCount, i =>
-    createRandomTexelView({
-      format: info.format,
-      size: virtualMipSize(dimension, info.size, i),
-      mipLevel: i,
-    })
+    createRandomTexelView(
+      {
+        format: info.format,
+        size: virtualMipSize(dimension, info.size, i),
+        mipLevel: i,
+        sampleCount: info.sampleCount ?? 1,
+      },
+      options
+    )
   );
 }
 
@@ -457,33 +1179,70 @@ export type Dimensionality = vec1 | vec2 | vec3;
 
 type TextureCallArgKeys = keyof TextureCallArgs<vec1>;
 const kTextureCallArgNames: readonly TextureCallArgKeys[] = [
+  'component',
   'coords',
+  'derivativeMult', // NOTE: derivativeMult not an argument but is used with coords for implicit derivatives.
   'arrayIndex',
+  'bias',
   'sampleIndex',
   'mipLevel',
   'ddx',
   'ddy',
+  'depthRef',
   'offset',
 ] as const;
 
 export interface TextureCallArgs<T extends Dimensionality> {
-  coords?: T;
+  component?: number; // Used by textureGather
+  coords?: T; // The coord passed
+  derivativeMult?: T;
   mipLevel?: number;
   arrayIndex?: number;
+  bias?: number;
   sampleIndex?: number;
+  depthRef?: number;
   ddx?: T;
   ddy?: T;
   offset?: T;
 }
 
+export type TextureBuiltin =
+  | 'textureGather'
+  | 'textureGatherCompare'
+  | 'textureLoad'
+  | 'textureSample'
+  | 'textureSampleBaseClampToEdge'
+  | 'textureSampleBias'
+  | 'textureSampleCompare'
+  | 'textureSampleCompareLevel'
+  | 'textureSampleGrad'
+  | 'textureSampleLevel';
+
 export interface TextureCall<T extends Dimensionality> extends TextureCallArgs<T> {
-  builtin: 'textureLoad' | 'textureSample' | 'textureSampleBaseClampToEdge' | 'textureSampleLevel';
+  builtin: TextureBuiltin;
   coordType: 'f' | 'i' | 'u';
   levelType?: 'i' | 'u' | 'f';
   arrayIndexType?: 'i' | 'u';
   sampleIndexType?: 'i' | 'u';
+  componentType?: 'i' | 'u';
 }
 
+const isBuiltinComparison = (builtin: TextureBuiltin) =>
+  builtin === 'textureGatherCompare' ||
+  builtin === 'textureSampleCompare' ||
+  builtin === 'textureSampleCompareLevel';
+const isBuiltinGather = (builtin: TextureBuiltin | undefined) =>
+  builtin === 'textureGather' || builtin === 'textureGatherCompare';
+const builtinNeedsSampler = (builtin: TextureBuiltin) =>
+  builtin.startsWith('textureSample') || builtin.startsWith('textureGather');
+const builtinNeedsDerivatives = (builtin: TextureBuiltin) =>
+  builtin === 'textureSample' ||
+  builtin === 'textureSampleBias' ||
+  builtin === 'textureSampleCompare';
+
+const isCubeViewDimension = (viewDescriptor?: GPUTextureViewDescriptor) =>
+  viewDescriptor?.dimension === 'cube' || viewDescriptor?.dimension === 'cube-array';
+
 const s_u32 = new Uint32Array(1);
 const s_f32 = new Float32Array(s_u32.buffer);
 const s_i32 = new Int32Array(s_u32.buffer);
@@ -511,7 +1270,11 @@ function getCallArgType<T extends Dimensionality>(
 ) {
   switch (argName) {
     case 'coords':
+    case 'derivativeMult':
       return call.coordType;
+    case 'component':
+      assert(call.componentType !== undefined);
+      return call.componentType;
     case 'mipLevel':
       assert(call.levelType !== undefined);
       return call.levelType;
@@ -521,6 +1284,8 @@ function getCallArgType<T extends Dimensionality>(
     case 'sampleIndex':
       assert(call.sampleIndexType !== undefined);
       return call.sampleIndexType;
+    case 'bias':
+    case 'depthRef':
     case 'ddx':
     case 'ddy':
       return 'f';
@@ -609,6 +1374,35 @@ function convertPerTexelComponentToResultFormat(
   return out;
 }
 
+/**
+ * Convert RGBA result format to texel view format.
+ * Example, converts
+ *   { R: 0.1, G: 0, B: 0, A: 1 } to { Depth: 0.1 }
+ *   { R: 0.1 } to { R: 0.1, G: 0, B: 0, A: 1 }
+ */
+function convertToTexelViewFormat(src: PerTexelComponent<number>, format: GPUTextureFormat) {
+  const componentOrder = isDepthTextureFormat(format)
+    ? [TexelComponent.Depth]
+    : isStencilTextureFormat(format)
+    ? [TexelComponent.Stencil]
+    : [TexelComponent.R, TexelComponent.G, TexelComponent.B, TexelComponent.A];
+  const out: PerTexelComponent<number> = {};
+  for (const component of componentOrder) {
+    let v = src[component];
+    if (v === undefined) {
+      if (component === 'Depth' || component === 'Stencil') {
+        v = src.R;
+      } else if (component === 'G' || component === 'B') {
+        v = 0;
+      } else {
+        v = 1;
+      }
+    }
+    out[component] = v;
+  }
+  return out;
+}
+
 /**
  * Convert RGBA result format to texel view format of src texture.
  * Effectively this converts something like { R: 0.1, G: 0, B: 0, A: 1 }
@@ -631,14 +1425,45 @@ function zeroValuePerTexelComponent(components: TexelComponent[]) {
   for (const component of components) {
     out[component] = 0;
   }
-  return out;
+  return out;
+}
+
+const kSamplerFns: Record<GPUCompareFunction, (ref: number, v: number) => boolean> = {
+  never: (ref: number, v: number) => false,
+  less: (ref: number, v: number) => ref < v,
+  equal: (ref: number, v: number) => ref === v,
+  'less-equal': (ref: number, v: number) => ref <= v,
+  greater: (ref: number, v: number) => ref > v,
+  'not-equal': (ref: number, v: number) => ref !== v,
+  'greater-equal': (ref: number, v: number) => ref >= v,
+  always: (ref: number, v: number) => true,
+} as const;
+
+function applyCompare<T extends Dimensionality>(
+  call: TextureCall<T>,
+  sampler: GPUSamplerDescriptor | undefined,
+  components: TexelComponent[],
+  src: PerTexelComponent<number>
+): PerTexelComponent<number> {
+  if (isBuiltinComparison(call.builtin)) {
+    assert(sampler !== undefined);
+    assert(call.depthRef !== undefined);
+    const out: PerTexelComponent<number> = {};
+    const compareFn = kSamplerFns[sampler.compare!];
+    for (const component of components) {
+      out[component] = compareFn(call.depthRef, src[component]!) ? 1 : 0;
+    }
+    return out;
+  } else {
+    return src;
+  }
 }
 
 /**
  * Returns the expect value for a WGSL builtin texture function for a single
  * mip level
  */
-export function softwareTextureReadMipLevel<T extends Dimensionality>(
+function softwareTextureReadMipLevel<T extends Dimensionality>(
   call: TextureCall<T>,
   texture: Texture,
   sampler: GPUSamplerDescriptor | undefined,
@@ -661,10 +1486,7 @@ export function softwareTextureReadMipLevel<T extends Dimensionality>(
           sampler?.addressModeW ?? 'clamp-to-edge',
         ];
 
-  const isCube =
-    texture.viewDescriptor.dimension === 'cube' ||
-    texture.viewDescriptor.dimension === 'cube-array';
-
+  const isCube = isCubeViewDimension(texture.viewDescriptor);
   const arrayIndexMult = isCube ? 6 : 1;
   const numLayers = textureSize[2] / arrayIndexMult;
   assert(numLayers % 1 === 0);
@@ -684,8 +1506,14 @@ export function softwareTextureReadMipLevel<T extends Dimensionality>(
   };
 
   switch (call.builtin) {
+    case 'textureGather':
+    case 'textureGatherCompare':
     case 'textureSample':
+    case 'textureSampleBias':
     case 'textureSampleBaseClampToEdge':
+    case 'textureSampleCompare':
+    case 'textureSampleCompareLevel':
+    case 'textureSampleGrad':
     case 'textureSampleLevel': {
       let coords = toArray(call.coords!);
 
@@ -714,7 +1542,7 @@ export function softwareTextureReadMipLevel<T extends Dimensionality>(
 
       const samples: { at: number[]; weight: number }[] = [];
 
-      const filter = sampler?.minFilter ?? 'nearest';
+      const filter = isBuiltinGather(call.builtin) ? 'linear' : sampler?.minFilter ?? 'nearest';
       switch (filter) {
         case 'linear': {
           // 'p0' is the lower texel for 'at'
@@ -733,10 +1561,11 @@ export function softwareTextureReadMipLevel<T extends Dimensionality>(
               samples.push({ at: p1, weight: p1W[0] });
               break;
             case 2: {
-              samples.push({ at: p0, weight: p0W[0] * p0W[1] });
-              samples.push({ at: [p1[0], p0[1]], weight: p1W[0] * p0W[1] });
+              // Note: These are ordered to match textureGather
               samples.push({ at: [p0[0], p1[1]], weight: p0W[0] * p1W[1] });
               samples.push({ at: p1, weight: p1W[0] * p1W[1] });
+              samples.push({ at: [p1[0], p0[1]], weight: p1W[0] * p0W[1] });
+              samples.push({ at: p0, weight: p0W[0] * p0W[1] });
               break;
             }
             case 3: {
@@ -746,10 +1575,11 @@ export function softwareTextureReadMipLevel<T extends Dimensionality>(
               // the slice they'll be wrapped by wrapFaceCoordToCubeFaceAtEdgeBoundaries
               // below.
               if (isCube) {
-                samples.push({ at: p0, weight: p0W[0] * p0W[1] });
-                samples.push({ at: [p1[0], p0[1], p0[2]], weight: p1W[0] * p0W[1] });
+                // Note: These are ordered to match textureGather
                 samples.push({ at: [p0[0], p1[1], p0[2]], weight: p0W[0] * p1W[1] });
                 samples.push({ at: p1, weight: p1W[0] * p1W[1] });
+                samples.push({ at: [p1[0], p0[1], p0[2]], weight: p1W[0] * p0W[1] });
+                samples.push({ at: p0, weight: p0W[0] * p0W[1] });
                 const ndx = getUnusedCubeCornerSampleIndex(textureSize[0], coords as vec3);
                 if (ndx >= 0) {
                   // # Issues with corners of cubemaps
@@ -783,7 +1613,16 @@ export function softwareTextureReadMipLevel<T extends Dimensionality>(
                   // I'm not sure what "average the values of the three available samples"
                   // means. To me that would be (a+b+c)/3 or in other words, set all the
                   // weights to 0.33333 but that's not what the M1 is doing.
-                  unreachable('corners of cubemaps are not testable');
+                  //
+                  // We could check that, given the 3 texels at the corner, if all 3 texels
+                  // are the same value then the result must be the same value. Otherwise,
+                  // the result must be between the 3 values. For now, the code that
+                  // chooses test coordinates avoids corners. This has the restriction
+                  // that the smallest mip level be at least 4x4 so there are some non
+                  // corners to choose from.
+                  unreachable(
+                    `corners of cubemaps are not testable:\n   ${describeTextureCall(call)}`
+                  );
                 }
               } else {
                 const p = [p0, p1];
@@ -813,16 +1652,33 @@ export function softwareTextureReadMipLevel<T extends Dimensionality>(
           unreachable();
       }
 
+      if (isBuiltinGather(call.builtin)) {
+        const componentNdx = call.component ?? 0;
+        assert(componentNdx >= 0 && componentNdx < 4);
+        assert(samples.length === 4);
+        const component = kRGBAComponents[componentNdx];
+        const out: PerTexelComponent<number> = {};
+        samples.forEach((sample, i) => {
+          const c = isCube
+            ? wrapFaceCoordToCubeFaceAtEdgeBoundaries(textureSize[0], sample.at as vec3)
+            : applyAddressModesToCoords(addressMode, textureSize, sample.at);
+          const v = load(c);
+          const postV = applyCompare(call, sampler, rep.componentOrder, v);
+          const rgba = convertPerTexelComponentToResultFormat(postV, format);
+          out[kRGBAComponents[i]] = rgba[component];
+        });
+        return out;
+      }
+
       const out: PerTexelComponent<number> = {};
-      const ss = [];
       for (const sample of samples) {
         const c = isCube
           ? wrapFaceCoordToCubeFaceAtEdgeBoundaries(textureSize[0], sample.at as vec3)
           : applyAddressModesToCoords(addressMode, textureSize, sample.at);
         const v = load(c);
-        ss.push(v);
+        const postV = applyCompare(call, sampler, rep.componentOrder, v);
         for (const component of rep.componentOrder) {
-          out[component] = (out[component] ?? 0) + v[component]! * sample.weight;
+          out[component] = (out[component] ?? 0) + postV[component]! * sample.weight;
         }
       }
 
@@ -842,8 +1698,9 @@ export function softwareTextureReadMipLevel<T extends Dimensionality>(
 /**
  * Reads a texture, optionally sampling between 2 mipLevels
  */
-export function softwareTextureReadLevel<T extends Dimensionality>(
+function softwareTextureReadLevel<T extends Dimensionality>(
   t: GPUTest,
+  stage: ShaderStage,
   call: TextureCall<T>,
   texture: Texture,
   sampler: GPUSamplerDescriptor | undefined,
@@ -856,14 +1713,17 @@ export function softwareTextureReadLevel<T extends Dimensionality>(
     return softwareTextureReadMipLevel<T>(call, texture, sampler, mipLevel);
   }
 
-  switch (sampler.mipmapFilter) {
+  const effectiveMipmapFilter = isBuiltinGather(call.builtin) ? 'nearest' : sampler.mipmapFilter;
+  switch (effectiveMipmapFilter) {
     case 'linear': {
       const clampedMipLevel = clamp(mipLevel, { min: 0, max: maxLevel });
       const baseMipLevel = Math.floor(clampedMipLevel);
       const nextMipLevel = Math.ceil(clampedMipLevel);
       const t0 = softwareTextureReadMipLevel<T>(call, texture, sampler, baseMipLevel);
       const t1 = softwareTextureReadMipLevel<T>(call, texture, sampler, nextMipLevel);
-      const mix = getWeightForMipLevel(t, mipLevelCount, mipLevel);
+      const weightType = call.builtin === 'textureSampleLevel' ? 'sampleLevelWeights' : 'identity';
+      const mix = getWeightForMipLevel(t, stage, weightType, mipLevelCount, clampedMipLevel);
+      assert(mix >= 0 && mix <= 1);
       const values = [
         { v: t0, weight: 1 - mix },
         { v: t1, weight: mix },
@@ -885,45 +1745,153 @@ export function softwareTextureReadLevel<T extends Dimensionality>(
   }
 }
 
+function computeMipLevelFromGradients(
+  ddx: readonly number[],
+  ddy: readonly number[],
+  size: GPUExtent3D
+) {
+  const texSize = reifyExtent3D(size);
+  const textureSize = [texSize.width, texSize.height, texSize.depthOrArrayLayers];
+
+  // Compute the mip level the same way textureSampleGrad does according to the spec.
+  const scaledDdx = ddx.map((v, i) => v * textureSize[i]);
+  const scaledDdy = ddy.map((v, i) => v * textureSize[i]);
+  const dotDDX = dotProduct(scaledDdx, scaledDdx);
+  const dotDDY = dotProduct(scaledDdy, scaledDdy);
+  const deltaMax = Math.max(dotDDX, dotDDY);
+  const mipLevel = 0.5 * Math.log2(deltaMax);
+  return mipLevel;
+}
+
+function computeMipLevelFromGradientsForCall<T extends Dimensionality>(
+  call: TextureCall<T>,
+  size: GPUExtent3D
+) {
+  assert(!!call.ddx);
+  assert(!!call.ddy);
+  // ddx and ddy are the values that would be passed to textureSampleGrad
+  // If we're emulating textureSample then they're the computed derivatives
+  // such that if we passed them to textureSampleGrad they'd produce the
+  // same result.
+  const ddx: readonly number[] = typeof call.ddx === 'number' ? [call.ddx] : call.ddx;
+  const ddy: readonly number[] = typeof call.ddy === 'number' ? [call.ddy] : call.ddy;
+
+  return computeMipLevelFromGradients(ddx, ddy, size);
+}
+
 /**
- * The software version of a texture builtin (eg: textureSample)
- * Note that this is not a complete implementation. Rather it's only
- * what's needed to generate the correct expected value for the tests.
+ * The software version of textureSampleGrad except with optional level.
  */
-export function softwareTextureRead<T extends Dimensionality>(
+function softwareTextureReadGrad<T extends Dimensionality>(
   t: GPUTest,
+  stage: ShaderStage,
   call: TextureCall<T>,
   texture: Texture,
-  sampler: GPUSamplerDescriptor
+  sampler?: GPUSamplerDescriptor
 ): PerTexelComponent<number> {
-  assert(call.ddx !== undefined);
-  assert(call.ddy !== undefined);
+  const bias = call.bias === undefined ? 0 : clamp(call.bias, { min: -16.0, max: 15.99 });
+  if (call.ddx) {
+    const mipLevel = computeMipLevelFromGradientsForCall(call, texture.descriptor.size);
+    const mipLevelCount = texture.descriptor.mipLevelCount ?? 1;
+    const clampedMipLevel = clamp(mipLevel + bias, { min: 0, max: mipLevelCount - 1 });
+    const weightMipLevel = mapSoftwareMipLevelToGPUMipLevel(t, stage, clampedMipLevel);
+    return softwareTextureReadLevel(t, stage, call, texture, sampler, weightMipLevel);
+  } else {
+    return softwareTextureReadLevel(t, stage, call, texture, sampler, (call.mipLevel ?? 0) + bias);
+  }
+}
+
+/**
+ * This must match the code in doTextureCalls for derivativeBase
+ *
+ * Note: normal implicit derivatives are computed like this
+ *
+ * fn textureSample(T, S, coord) -> vec4f {
+ *    return textureSampleGrad(T, S, dpdx(coord), dpdy(coord));
+ * }
+ *
+ * dpdx and dpdy are effectively computed by,
+ * getting the values of coord for 2x2 adjacent texels.
+ *
+ *   p0 = coord value at x, y
+ *   p1 = coord value at x + 1, y
+ *   p2 = coord value at x, y + 1
+ *   p3 = coord value at x + 1, y + 1
+ *
+ * dpdx is the average delta in x and dpdy is the average delta in y
+ *
+ *   dpdx = (p1 - p0 + p3 - p2) / 2   // average of horizontal change
+ *   dpdy = (p2 - p0 + p3 - p1) / 2   // average of vertical change
+ *
+ * derivativeBase is
+ *
+ *       '1d'    '2d'     '3d'
+ *   p0 = [0]   [0, 0]  [0, 0, 0]
+ *   p1 = [1]   [1, 0]  [1, 0, 0]
+ *   p2 = [0]   [0, 1]  [0, 1, 0]
+ *   p3 = [1]   [1, 1]  [1, 1, 0]
+ *
+ * But, these values are normalized texels coords so if the src texture
+ * is 8x8 these would be * 0.125
+ *
+ * Note: to test other derivatives we add in a multiplier but,
+ * this base gives us something to add that starts at 0,0 at the call
+ * but who's derivatives we can easily set. We need the default
+ * derivativeBase to be 1 otherwise it's 0 which makes the computed mip level
+ * be -Infinity which means bias in `textureSampleBias` has no meaning.
+ */
+function derivativeBaseForCall<T extends Dimensionality>(texture: Texture, isDDX: boolean) {
   const texSize = reifyExtent3D(texture.descriptor.size);
-  const textureSize = [texSize.width, texSize.height];
+  const textureSize = [texSize.width, texSize.height, texSize.depthOrArrayLayers];
+  if (isCubeViewDimension(texture.viewDescriptor)) {
+    return (isDDX ? [1 / textureSize[0], 0, 1] : [0, 1 / textureSize[1], 1]) as T;
+  } else if (texture.descriptor.dimension === '3d') {
+    return (isDDX ? [1 / textureSize[0], 0, 0] : [0, 1 / textureSize[1], 0]) as T;
+  } else if (texture.descriptor.dimension === '1d') {
+    return [1 / textureSize[0]] as T;
+  } else {
+    return (isDDX ? [1 / textureSize[0], 0] : [0, 1 / textureSize[1]]) as T;
+  }
+}
 
-  // ddx and ddy are the values that would be passed to textureSampleGrad
-  // If we're emulating textureSample then they're the computed derivatives
-  // such that if we passed them to textureSampleGrad they'd produce the
-  // same result.
-  const ddx: readonly number[] = typeof call.ddx === 'number' ? [call.ddx] : call.ddx;
-  const ddy: readonly number[] = typeof call.ddy === 'number' ? [call.ddy] : call.ddy;
+/**
+ * Multiplies derivativeBase by derivativeMult or 1
+ */
+function derivativeForCall<T extends Dimensionality>(
+  texture: Texture,
+  call: TextureCall<T>,
+  isDDX: boolean
+) {
+  const dd = derivativeBaseForCall(texture, isDDX);
+  return dd.map((v, i) => v * (call.derivativeMult?.[i] ?? 1)) as T;
+}
 
-  // Compute the mip level the same way textureSampleGrad does
-  const scaledDdx = ddx.map((v, i) => v * textureSize[i]);
-  const scaledDdy = ddy.map((v, i) => v * textureSize[i]);
-  const dotDDX = dotProduct(scaledDdx, scaledDdx);
-  const dotDDY = dotProduct(scaledDdy, scaledDdy);
-  const deltaMax = Math.max(dotDDX, dotDDY);
-  // MAINTENANCE_TODO: handle texture view baseMipLevel and mipLevelCount?
-  const mipLevel = 0.5 * Math.log2(deltaMax);
-  return softwareTextureReadLevel(t, call, texture, sampler, mipLevel);
+function softwareTextureRead<T extends Dimensionality>(
+  t: GPUTest,
+  stage: ShaderStage,
+  call: TextureCall<T>,
+  texture: Texture,
+  sampler?: GPUSamplerDescriptor
+): PerTexelComponent<number> {
+  // add the implicit derivatives that we use from WGSL in doTextureCalls
+  if (builtinNeedsDerivatives(call.builtin) && !call.ddx) {
+    const newCall: TextureCall<T> = {
+      ...call,
+      ddx: call.ddx ?? derivativeForCall<T>(texture, call, true),
+      ddy: call.ddy ?? derivativeForCall<T>(texture, call, false),
+    };
+    call = newCall;
+  }
+  return softwareTextureReadGrad(t, stage, call, texture, sampler);
 }
 
-export type TextureTestOptions = {
+export type TextureTestOptions<T extends Dimensionality> = {
   ddx?: number; // the derivative we want at sample time
   ddy?: number;
-  uvwStart?: readonly [number, number]; // the starting uv value (these are used make the coordinates negative as it uncovered issues on some hardware)
-  offset?: readonly [number, number]; // a constant offset
+  uvwStart?: Readonly<T>; // the starting uv value (these are used make the coordinates negative as it uncovered issues on some hardware)
+  offset?: Readonly<T>; // a constant offset
+  depthTexture?: boolean;
+  arrayIndexType?: 'i' | 'u';
 };
 
 /**
@@ -1090,6 +2058,50 @@ function texelsApproximatelyEqual(
   return true;
 }
 
+// If it's `textureGather` then we need to convert all values to one component.
+// In other words, imagine the format is rg11b10ufloat. If it was
+// `textureSample` we'd have `r11, g11, b10, a=1` but for `textureGather`
+//
+// component = 0 => `r11, r11, r11, r11`
+// component = 1 => `g11, g11, g11, g11`
+// component = 2 => `b10, b10, b10, b10`
+//
+// etc..., each from a different texel
+//
+// The Texel utils don't handle this. So if `component = 2` we take each value,
+// copy it to the `B` component, run it through the texel utils so it returns
+// the correct ULP for a 10bit float (not an 11 bit float). Then copy it back to
+// the channel it came from.
+function getULPFromZeroForComponents(
+  rgba: PerTexelComponent<number>,
+  format: EncodableTextureFormat,
+  builtin: TextureBuiltin,
+  componentNdx?: number
+): PerTexelComponent<number> {
+  const rep = kTexelRepresentationInfo[format];
+  if (isBuiltinGather(builtin)) {
+    const out: PerTexelComponent<number> = {};
+    const component = kRGBAComponents[componentNdx ?? 0];
+    const temp: PerTexelComponent<number> = { R: 0, G: 0, B: 0, A: 1 };
+    for (const comp of kRGBAComponents) {
+      temp[component] = rgba[comp];
+      const texel = convertResultFormatToTexelViewFormat(temp, format);
+      const ulp = convertPerTexelComponentToResultFormat(
+        rep.bitsToULPFromZero(rep.numberToBits(texel)),
+        format
+      );
+      out[comp] = ulp[component];
+    }
+    return out;
+  } else {
+    const texel = convertResultFormatToTexelViewFormat(rgba, format);
+    return convertPerTexelComponentToResultFormat(
+      rep.bitsToULPFromZero(rep.numberToBits(texel)),
+      format
+    );
+  }
+}
+
 /**
  * Checks the result of each call matches the expected result.
  */
@@ -1099,11 +2111,24 @@ export async function checkCallResults<T extends Dimensionality>(
   textureType: string,
   sampler: GPUSamplerDescriptor | undefined,
   calls: TextureCall<T>[],
-  results: PerTexelComponent<number>[]
+  results: Awaited<ReturnType<typeof doTextureCalls<T>>>,
+  shortShaderStage: ShortShaderStage,
+  gpuTexture?: GPUTexture
 ) {
+  const stage = kShortShaderStageToShaderStage[shortShaderStage];
+  await initMipGradientValuesForDevice(t, stage);
+
+  let haveComparisonCheckInfo = false;
+  let checkInfo = {
+    runner: results.runner,
+    calls,
+    sampler,
+  };
+  // These are only read if the tests fail. They are used to get the values from the
+  // GPU texture for displaying in diagnostics.
+  let gpuTexels: TexelView[] | undefined;
   const errs: string[] = [];
   const format = texture.texels[0].format;
-  const rep = kTexelRepresentationInfo[format];
   const size = reifyExtent3D(texture.descriptor.size);
   const maxFractionalDiff =
     sampler?.minFilter === 'linear' ||
@@ -1112,10 +2137,20 @@ export async function checkCallResults<T extends Dimensionality>(
       ? getMaxFractionalDiffForTextureFormat(texture.descriptor.format)
       : 0;
 
-  for (let callIdx = 0; callIdx < calls.length && errs.length === 0; callIdx++) {
+  for (let callIdx = 0; callIdx < calls.length; callIdx++) {
     const call = calls[callIdx];
-    const gotRGBA = results[callIdx];
-    const expectRGBA = softwareTextureReadLevel(t, call, texture, sampler, call.mipLevel ?? 0);
+    const gotRGBA = results.results[callIdx];
+    const expectRGBA = softwareTextureRead(t, stage, call, texture, sampler);
+
+    // The spec says depth and stencil have implementation defined values for G, B, and A
+    // so if this is `textureGather` and component > 0 then there's nothing to check.
+    if (
+      isDepthOrStencilTextureFormat(format) &&
+      isBuiltinGather(call.builtin) &&
+      call.component! > 0
+    ) {
+      continue;
+    }
 
     if (texelsApproximatelyEqual(gotRGBA, expectRGBA, format, maxFractionalDiff)) {
       continue;
@@ -1125,237 +2160,235 @@ export async function checkCallResults<T extends Dimensionality>(
       continue;
     }
 
-    const got = convertResultFormatToTexelViewFormat(gotRGBA, format);
-    const expect = convertResultFormatToTexelViewFormat(expectRGBA, format);
-    const gULP = rep.bitsToULPFromZero(rep.numberToBits(got));
-    const eULP = rep.bitsToULPFromZero(rep.numberToBits(expect));
-    for (const component of rep.componentOrder) {
-      const g = got[component]!;
-      const e = expect[component]!;
+    const gULP = getULPFromZeroForComponents(gotRGBA, format, call.builtin, call.component);
+    const eULP = getULPFromZeroForComponents(expectRGBA, format, call.builtin, call.component);
+
+    // from the spec: https://gpuweb.github.io/gpuweb/#reading-depth-stencil
+    // depth and stencil values are D, ?, ?, ?
+    const rgbaComponentsToCheck =
+      isBuiltinGather(call.builtin) || !isDepthOrStencilTextureFormat(format)
+        ? kRGBAComponents
+        : kRComponent;
+
+    let bad = false;
+    const diffs = rgbaComponentsToCheck.map(component => {
+      const g = gotRGBA[component]!;
+      const e = expectRGBA[component]!;
       const absDiff = Math.abs(g - e);
       const ulpDiff = Math.abs(gULP[component]! - eULP[component]!);
-      const relDiff = absDiff / Math.max(Math.abs(g), Math.abs(e));
+      assert(!Number.isNaN(ulpDiff));
+      const maxAbs = Math.max(Math.abs(g), Math.abs(e));
+      const relDiff = maxAbs > 0 ? absDiff / maxAbs : 0;
       if (ulpDiff > 3 && absDiff > maxFractionalDiff) {
-        const desc = describeTextureCall(call);
-        errs.push(`component was not as expected:
+        bad = true;
+      }
+      return { absDiff, relDiff, ulpDiff };
+    });
+
+    const isFloatType = (format: GPUTextureFormat) => {
+      const info = kTextureFormatInfo[format];
+      return info.color?.type === 'float' || info.depth?.type === 'depth';
+    };
+    const fix5 = (n: number) => (isFloatType(format) ? n.toFixed(5) : n.toString());
+    const fix5v = (arr: number[]) => arr.map(v => fix5(v)).join(', ');
+    const rgbaToArray = (p: PerTexelComponent<number>): number[] =>
+      rgbaComponentsToCheck.map(component => p[component]!);
+
+    if (bad) {
+      const desc = describeTextureCall(call);
+      errs.push(`result was not as expected:
       size: [${size.width}, ${size.height}, ${size.depthOrArrayLayers}]
   mipCount: ${texture.descriptor.mipLevelCount ?? 1}
-      call: ${desc}  // #${callIdx}
- component: ${component}
-       got: ${g}
-  expected: ${e}
-  abs diff: ${absDiff.toFixed(4)}
-  rel diff: ${(relDiff * 100).toFixed(2)}%
-  ulp diff: ${ulpDiff}
+      call: ${desc}  // #${callIdx}`);
+      if (isCubeViewDimension(texture.viewDescriptor)) {
+        const coord = convertCubeCoordToNormalized3DTextureCoord(call.coords as vec3);
+        const faceNdx = Math.floor(coord[2] * 6);
+        errs.push(`          : as 3D texture coord: (${coord[0]}, ${coord[1]}, ${coord[2]})`);
+        for (let mipLevel = 0; mipLevel < (texture.descriptor.mipLevelCount ?? 1); ++mipLevel) {
+          const mipSize = virtualMipSize(
+            texture.descriptor.dimension ?? '2d',
+            texture.descriptor.size,
+            mipLevel
+          );
+          const t = coord.slice(0, 2).map((v, i) => (v * mipSize[i]).toFixed(3));
+          errs.push(
+            `          : as texel coord mip level[${mipLevel}]: (${t[0]}, ${t[1]}), face: ${faceNdx}(${kFaceNames[faceNdx]})`
+          );
+        }
+      } else {
+        for (let mipLevel = 0; mipLevel < (texture.descriptor.mipLevelCount ?? 1); ++mipLevel) {
+          const mipSize = virtualMipSize(
+            texture.descriptor.dimension ?? '2d',
+            texture.descriptor.size,
+            mipLevel
+          );
+          const t = call.coords!.map((v, i) => (v * mipSize[i]).toFixed(3));
+          errs.push(`          : as texel coord @ mip level[${mipLevel}]: (${t.join(', ')})`);
+        }
+      }
+      if (builtinNeedsDerivatives(call.builtin)) {
+        const ddx = derivativeForCall<T>(texture, call, true);
+        const ddy = derivativeForCall<T>(texture, call, false);
+        const mipLevel = computeMipLevelFromGradients(ddx, ddy, size);
+        const biasStr = call.bias === undefined ? '' : ' (without bias)';
+        errs.push(`implicit derivative based mip level: ${fix5(mipLevel)}${biasStr}`);
+        if (call.bias) {
+          const clampedBias = clamp(call.bias ?? 0, { min: -16.0, max: 15.99 });
+          errs.push(`\
+                       clamped bias: ${fix5(clampedBias)}
+                mip level with bias: ${fix5(mipLevel + clampedBias)}`);
+        }
+      } else if (call.ddx) {
+        const mipLevel = computeMipLevelFromGradientsForCall(call, size);
+        errs.push(`gradient based mip level: ${mipLevel}`);
+      }
+      errs.push(`\
+       got: ${fix5v(rgbaToArray(gotRGBA))}
+  expected: ${fix5v(rgbaToArray(expectRGBA))}
+  max diff: ${maxFractionalDiff}
+ abs diffs: ${fix5v(diffs.map(({ absDiff }) => absDiff))}
+ rel diffs: ${diffs.map(({ relDiff }) => `${(relDiff * 100).toFixed(2)}%`).join(', ')}
+ ulp diffs: ${diffs.map(({ ulpDiff }) => ulpDiff).join(', ')}
 `);
-        if (sampler) {
+
+      if (sampler) {
+        if (t.rec.debugging) {
+          // For compares, we can't use the builtin (textureXXXCompareXXX) because it only
+          // returns 0 or 1 or the average of 0 and 1 for multiple samples. And, for example,
+          // if the comparison is `always` then every sample returns 1. So we need to use the
+          // corresponding sample function to get the actual values from the textures
+          //
+          // textureSampleCompare -> textureSample
+          // textureSampleCompareLevel -> textureSampleLevel
+          // textureGatherCompare -> textureGather
+          if (isBuiltinComparison(call.builtin)) {
+            if (!haveComparisonCheckInfo) {
+              // Convert the comparison calls to their corresponding non-comparison call
+              const debugCalls = calls.map(call => {
+                const debugCall = { ...call };
+                debugCall.depthRef = undefined;
+                switch (call.builtin) {
+                  case 'textureGatherCompare':
+                    debugCall.builtin = 'textureGather';
+                    break;
+                  case 'textureSampleCompare':
+                    debugCall.builtin = 'textureSample';
+                    break;
+                  case 'textureSampleCompareLevel':
+                    debugCall.builtin = 'textureSampleLevel';
+                    debugCall.levelType = 'f';
+                    debugCall.mipLevel = 0;
+                    break;
+                  default:
+                    unreachable();
+                }
+                return debugCall;
+              });
+
+              // Convert the comparison sampler to a non-comparison sampler
+              const debugSampler = { ...sampler };
+              delete debugSampler.compare;
+
+              // Make a runner for these changed calls.
+              const debugRunner = createTextureCallsRunner(
+                t,
+                {
+                  format,
+                  dimension: texture.descriptor.dimension ?? '2d',
+                  sampleCount: texture.descriptor.sampleCount ?? 1,
+                  depthOrArrayLayers: size.depthOrArrayLayers,
+                },
+                texture.viewDescriptor,
+                textureType,
+                debugSampler,
+                debugCalls,
+                stage
+              );
+              checkInfo = {
+                runner: debugRunner,
+                sampler: debugSampler,
+                calls: debugCalls,
+              };
+              haveComparisonCheckInfo = true;
+            }
+          }
+
+          if (!gpuTexels && gpuTexture) {
+            // Read the texture back if we haven't yet. We'll use this
+            // to get values for each sample point.
+            gpuTexels = await readTextureToTexelViews(
+              t,
+              gpuTexture,
+              texture.descriptor,
+              getTexelViewFormatForTextureFormat(gpuTexture.format)
+            );
+          }
+
+          const callForSamplePoints = checkInfo.calls[callIdx];
+
           const expectedSamplePoints = [
             'expected:',
-            ...(await identifySamplePoints(texture, (texels: TexelView[]) => {
-              return Promise.resolve(
-                softwareTextureReadLevel(
-                  t,
-                  call,
-                  {
-                    texels,
-                    descriptor: texture.descriptor,
-                    viewDescriptor: texture.viewDescriptor,
-                  },
-                  sampler,
-                  call.mipLevel ?? 0
-                )
-              );
-            })),
+            ...(await identifySamplePoints(
+              texture,
+              sampler,
+              callForSamplePoints,
+              call,
+              texture.texels,
+              (texels: TexelView[]) => {
+                return Promise.resolve(
+                  softwareTextureRead(
+                    t,
+                    stage,
+                    callForSamplePoints,
+                    {
+                      texels,
+                      descriptor: texture.descriptor,
+                      viewDescriptor: texture.viewDescriptor,
+                    },
+                    checkInfo.sampler
+                  )
+                );
+              }
+            )),
           ];
           const gotSamplePoints = [
             'got:',
-            ...(await identifySamplePoints(texture, async (texels: TexelView[]) => {
-              const gpuTexture = createTextureFromTexelViews(t, texels, texture.descriptor);
-              const result = (
-                await doTextureCalls(t, gpuTexture, texture.viewDescriptor, textureType, sampler, [
-                  call,
-                ])
-              )[0];
-              gpuTexture.destroy();
-              return result;
-            })),
+            ...(await identifySamplePoints(
+              texture,
+              sampler,
+              callForSamplePoints,
+              call,
+              gpuTexels,
+              async (texels: TexelView[]) => {
+                const gpuTexture = createTextureFromTexelViewsLocal(t, texels, texture.descriptor);
+                const result = (await checkInfo.runner.run(gpuTexture))[callIdx];
+                gpuTexture.destroy();
+                return result;
+              }
+            )),
           ];
           errs.push('  sample points:');
           errs.push(layoutTwoColumns(expectedSamplePoints, gotSamplePoints).join('\n'));
           errs.push('', '');
         }
-      }
-    }
-  }
-
-  return errs.length > 0 ? new Error(errs.join('\n')) : undefined;
-}
-
-/**
- * "Renders a quad" to a TexelView with the given parameters,
- * sampling from the given Texture.
- */
-export function softwareRasterize<T extends Dimensionality>(
-  t: GPUTest,
-  texture: Texture,
-  sampler: GPUSamplerDescriptor,
-  targetSize: [number, number],
-  options: TextureTestOptions
-) {
-  const [width, height] = targetSize;
-  const { ddx = 1, ddy = 1, uvwStart = [0, 0] } = options;
-  const format = 'rgba32float';
-
-  const textureSize = reifyExtent3D(texture.descriptor.size);
-
-  // MAINTENANCE_TODO: Consider passing these in as a similar computation
-  // happens in putDataInTextureThenDrawAndCheckResultsComparedToSoftwareRasterizer.
-  // The issue is there, the calculation is "what do we need to multiply the unitQuad
-  // by to get the derivatives we want". The calculation here is "what coordinate
-  // will we get for a given frag coordinate". It turns out to be the same calculation
-  // but needs rephrasing them so they are more obviously the same would help
-  // consolidate them into one calculation.
-  const screenSpaceUMult = (ddx * width) / textureSize.width;
-  const screenSpaceVMult = (ddy * height) / textureSize.height;
-
-  const rep = kTexelRepresentationInfo[format];
-
-  const expData = new Float32Array(width * height * 4);
-  for (let y = 0; y < height; ++y) {
-    const fragY = height - y - 1 + 0.5;
-    for (let x = 0; x < width; ++x) {
-      const fragX = x + 0.5;
-      // This code calculates the same value that will be passed to
-      // `textureSample` in the fragment shader for a given frag coord (see the
-      // WGSL code which uses the same formula, but using interpolation). That
-      // shader renders a clip space quad and includes a inter-stage "uv"
-      // coordinates that start with a unit quad (0,0) to (1,1) and is
-      // multiplied by ddx,ddy and as added in uStart and vStart
-      //
-      // uv = unitQuad * vec2(ddx, ddy) + vec2(vStart, uStart);
-      //
-      // softwareTextureRead<T> simulates a single call to `textureSample` so
-      // here we're computing the `uv` value that will be passed for a
-      // particular fragment coordinate. fragX / width, fragY / height provides
-      // the unitQuad value.
-      //
-      // ddx and ddy in this case are the derivative values we want to test. We
-      // pass those into the softwareTextureRead<T> as they would normally be
-      // derived from the change in coord.
-      const coords = [
-        (fragX / width) * screenSpaceUMult + uvwStart[0],
-        (fragY / height) * screenSpaceVMult + uvwStart[1],
-      ] as T;
-      const call: TextureCall<T> = {
-        builtin: 'textureSample',
-        coordType: 'f',
-        coords,
-        ddx: [ddx / textureSize.width, 0] as T,
-        ddy: [0, ddy / textureSize.height] as T,
-        offset: options.offset as T,
-      };
-      const sample = softwareTextureRead<T>(t, call, texture, sampler);
-      const rgba = { R: 0, G: 0, B: 0, A: 1, ...sample };
-      const asRgba32Float = new Float32Array(rep.pack(rgba));
-      expData.set(asRgba32Float, (y * width + x) * 4);
-    }
-  }
-
-  return TexelView.fromTextureDataByReference(format, new Uint8Array(expData.buffer), {
-    bytesPerRow: width * 4 * 4,
-    rowsPerImage: height,
-    subrectOrigin: [0, 0, 0],
-    subrectSize: targetSize,
-  });
-}
-
-/**
- * Render textured quad to an rgba32float texture.
- */
-export function drawTexture(
-  t: GPUTest & TextureTestMixinType,
-  texture: GPUTexture,
-  samplerDesc: GPUSamplerDescriptor,
-  options: TextureTestOptions
-) {
-  const device = t.device;
-  const { ddx = 1, ddy = 1, uvwStart = [0, 0, 0], offset } = options;
-
-  const format = 'rgba32float';
-  const renderTarget = t.createTextureTracked({
-    format,
-    size: [32, 32],
-    usage: GPUTextureUsage.COPY_SRC | GPUTextureUsage.RENDER_ATTACHMENT,
-  });
-
-  // Compute the amount we need to multiply the unitQuad by get the
-  // derivatives we want.
-  const uMult = (ddx * renderTarget.width) / texture.width;
-  const vMult = (ddy * renderTarget.height) / texture.height;
-
-  const offsetWGSL = offset ? `, vec2i(${offset[0]},${offset[1]})` : '';
-
-  const code = `
-struct InOut {
-  @builtin(position) pos: vec4f,
-  @location(0) uv: vec2f,
-};
-
-@vertex fn vs(@builtin(vertex_index) vertex_index : u32) -> InOut {
-  let positions = array(
-    vec2f(-1,  1), vec2f( 1,  1),
-    vec2f(-1, -1), vec2f( 1, -1),
-  );
-  let pos = positions[vertex_index];
-  return InOut(
-    vec4f(pos, 0, 1),
-    (pos * 0.5 + 0.5) * vec2f(${uMult}, ${vMult}) + vec2f(${uvwStart[0]}, ${uvwStart[1]}),
-  );
-}
-
-@group(0) @binding(0) var          T    : texture_2d<f32>;
-@group(0) @binding(1) var          S    : sampler;
-
-@fragment fn fs(v: InOut) -> @location(0) vec4f {
-  return textureSample(T, S, v.uv${offsetWGSL});
-}
-`;
-
-  const shaderModule = device.createShaderModule({ code });
 
-  const pipeline = device.createRenderPipeline({
-    layout: 'auto',
-    vertex: { module: shaderModule },
-    fragment: {
-      module: shaderModule,
-      targets: [{ format }],
-    },
-    primitive: { topology: 'triangle-strip' },
-  });
-
-  const sampler = device.createSampler(samplerDesc);
-
-  const bindGroup = device.createBindGroup({
-    layout: pipeline.getBindGroupLayout(0),
-    entries: [
-      { binding: 0, resource: texture.createView() },
-      { binding: 1, resource: sampler },
-    ],
-  });
-
-  const encoder = device.createCommandEncoder();
+        // this is not an else because it's common to comment out the previous `if` for running on a CQ.
+        if (!t.rec.debugging) {
+          errs.push('### turn on debugging to see sample points ###');
+        }
+      } // if (sampler)
 
-  const renderPass = encoder.beginRenderPass({
-    colorAttachments: [{ view: renderTarget.createView(), loadOp: 'clear', storeOp: 'store' }],
-  });
+      // Don't report the other errors. There 50 sample points per subcase and
+      // 50-100 subcases so the log would get enormous if all 50 fail. One
+      // report per subcase is enough.
+      break;
+    } // if (bad)
+  } // for cellNdx
 
-  renderPass.setPipeline(pipeline);
-  renderPass.setBindGroup(0, bindGroup);
-  renderPass.draw(4);
-  renderPass.end();
-  device.queue.submit([encoder.finish()]);
+  results.runner.destroy();
+  checkInfo.runner.destroy();
 
-  return renderTarget;
+  return errs.length > 0 ? new Error(errs.join('\n')) : undefined;
 }
 
 function getMaxFractionalDiffForTextureFormat(format: GPUTextureFormat) {
@@ -1399,11 +2432,11 @@ function getMaxFractionalDiffForTextureFormat(format: GPUTextureFormat) {
   // tolerances if possible.
 
   if (format.includes('depth')) {
-    return 3 / 65536;
+    return 3 / 100;
   } else if (format.includes('8unorm')) {
     return 7 / 255;
   } else if (format.includes('2unorm')) {
-    return 9 / 512;
+    return 13 / 512;
   } else if (format.includes('unorm')) {
     return 7 / 255;
   } else if (format.includes('8snorm')) {
@@ -1420,49 +2453,6 @@ function getMaxFractionalDiffForTextureFormat(format: GPUTextureFormat) {
   }
 }
 
-export function checkTextureMatchesExpectedTexelView(
-  t: GPUTest & TextureTestMixinType,
-  format: GPUTextureFormat,
-  actualTexture: GPUTexture,
-  expectedTexelView: TexelView
-) {
-  const maxFractionalDiff = getMaxFractionalDiffForTextureFormat(format);
-  t.expectTexelViewComparisonIsOkInTexture(
-    { texture: actualTexture },
-    expectedTexelView,
-    [actualTexture.width, actualTexture.height],
-    { maxFractionalDiff }
-  );
-}
-
-/**
- * Puts data in a texture. Renders a quad to a rgba32float. Then "software renders"
- * to a TexelView the expected result and compares the rendered texture to the
- * expected TexelView.
- */
-export async function putDataInTextureThenDrawAndCheckResultsComparedToSoftwareRasterizer<
-  T extends Dimensionality,
->(
-  t: GPUTest & TextureTestMixinType,
-  descriptor: GPUTextureDescriptor,
-  viewDescriptor: GPUTextureViewDescriptor,
-  samplerDesc: GPUSamplerDescriptor,
-  options: TextureTestOptions
-) {
-  const { texture, texels } = await createTextureWithRandomDataAndGetTexels(t, descriptor);
-
-  const actualTexture = drawTexture(t, texture, samplerDesc, options);
-  const expectedTexelView = softwareRasterize<T>(
-    t,
-    { descriptor, texels, viewDescriptor },
-    samplerDesc,
-    [actualTexture.width, actualTexture.height],
-    options
-  );
-
-  checkTextureMatchesExpectedTexelView(t, texture.format, actualTexture, expectedTexelView);
-}
-
 const sumOfCharCodesOfString = (s: unknown) =>
   String(s)
     .split('')
@@ -1533,7 +2523,7 @@ function getBlockFiller(format: GPUTextureFormat) {
 /**
  * Fills a texture with random data.
  */
-export function fillTextureWithRandomData(device: GPUDevice, texture: GPUTexture) {
+function fillTextureWithRandomData(device: GPUDevice, texture: GPUTexture) {
   assert(!isCompressedFloatTextureFormat(texture.format));
   const info = kTextureFormatInfo[texture.format];
   const hashBase =
@@ -1572,7 +2562,7 @@ const s_readTextureToRGBA32DeviceToPipeline = new WeakMap<
 // MAINTENANCE_TODO: remove cast once textureBindingViewDimension is added to IDL
 function getEffectiveViewDimension(
   t: GPUTest,
-  descriptor: GPUTextureDescriptor
+  descriptor: Omit<GPUTextureDescriptor, 'format' | 'usage'>
 ): GPUTextureViewDimension {
   const { textureBindingViewDimension } = descriptor as unknown as {
     textureBindingViewDimension?: GPUTextureViewDimension;
@@ -1588,7 +2578,7 @@ function getEffectiveViewDimension(
 export async function readTextureToTexelViews(
   t: GPUTest,
   texture: GPUTexture,
-  descriptor: GPUTextureDescriptor,
+  descriptor: Omit<GPUTextureDescriptor, 'format' | 'usage'>,
   format: EncodableTextureFormat
 ) {
   const device = t.device;
@@ -1763,6 +2753,19 @@ export async function readTextureToTexelViews(
   return texelViews;
 }
 
+function createTextureFromTexelViewsLocal(
+  t: GPUTest,
+  texelViews: TexelView[],
+  desc: Omit<GPUTextureDescriptor, 'format'>
+): GPUTexture {
+  const modifiedDescriptor = { ...desc };
+  // If it's a depth or stencil texture we need to render to it to fill it with data.
+  if (isDepthOrStencilTextureFormat(texelViews[0].format)) {
+    modifiedDescriptor.usage = desc.usage | GPUTextureUsage.RENDER_ATTACHMENT;
+  }
+  return createTextureFromTexelViews(t, texelViews, modifiedDescriptor);
+}
+
 /**
  * Fills a texture with random data and returns that data as
  * an array of TexelView.
@@ -1776,9 +2779,11 @@ export async function readTextureToTexelViews(
  */
 export async function createTextureWithRandomDataAndGetTexels(
   t: GPUTest,
-  descriptor: GPUTextureDescriptor
+  descriptor: GPUTextureDescriptor,
+  options?: RandomTextureOptions
 ) {
   if (isCompressedTextureFormat(descriptor.format)) {
+    assert(!options, 'options not supported for compressed textures');
     const texture = t.createTextureTracked(descriptor);
 
     fillTextureWithRandomData(t.device, texture);
@@ -1790,15 +2795,15 @@ export async function createTextureWithRandomDataAndGetTexels(
     );
     return { texture, texels };
   } else {
-    const texels = createRandomTexelViewMipmap(descriptor);
-    const texture = createTextureFromTexelViews(t, texels, descriptor);
+    const texels = createRandomTexelViewMipmap(descriptor, options);
+    const texture = createTextureFromTexelViewsLocal(t, texels, descriptor);
     return { texture, texels };
   }
 }
 
 function valueIfAllComponentsAreEqual(
   c: PerTexelComponent<number>,
-  componentOrder: TexelComponent[]
+  componentOrder: readonly TexelComponent[]
 ) {
   const s = new Set(componentOrder.map(component => c[component]!));
   return s.size === 1 ? s.values().next().value : undefined;
@@ -1873,32 +2878,36 @@ const kFaceNames = ['+x', '-x', '+y', '-y', '+z', '-z'] as const;
  * Example:
  *
  *     0   1   2   3   4   5   6   7
- *   ┌───┬───┬───┬───┬───┬───┬───┬───┐
- * 0 │   │   │   │   │   │   │   │   │
- *   ├───┼───┼───┼───┼───┼───┼───┼───┤
- * 1 │   │   │   │   │   │   │   │ a │
- *   ├───┼───┼───┼───┼───┼───┼───┼───┤
- * 2 │   │   │   │   │   │   │   │ b │
- *   ├───┼───┼───┼───┼───┼───┼───┼───┤
- * 3 │   │   │   │   │   │   │   │   │
- *   ├───┼───┼───┼───┼───┼───┼───┼───┤
- * 4 │   │   │   │   │   │   │   │   │
- *   ├───┼───┼───┼───┼───┼───┼───┼───┤
- * 5 │   │   │   │   │   │   │   │   │
- *   ├───┼───┼───┼───┼───┼───┼───┼───┤
- * 6 │   │   │   │   │   │   │   │   │
- *   ├───┼───┼───┼───┼───┼───┼───┼───┤
- * 7 │   │   │   │   │   │   │   │   │
- *   └───┴───┴───┴───┴───┴───┴───┴───┘
+ *   +---+---+---+---+---+---+---+---+
+ * 0 |   |   |   |   |   |   |   |   |
+ *   +---+---+---+---+---+---+---+---+
+ * 1 |   |   |   |   |   |   |   | a |
+ *   +---+---+---+---+---+---+---+---+
+ * 2 |   |   |   |   |   |   |   | b |
+ *   +---+---+---+---+---+---+---+---+
+ * 3 |   |   |   |   |   |   |   |   |
+ *   +---+---+---+---+---+---+---+---+
+ * 4 |   |   |   |   |   |   |   |   |
+ *   +---+---+---+---+---+---+---+---+
+ * 5 |   |   |   |   |   |   |   |   |
+ *   +---+---+---+---+---+---+---+---+
+ * 6 |   |   |   |   |   |   |   |   |
+ *   +---+---+---+---+---+---+---+---+
+ * 7 |   |   |   |   |   |   |   |   |
+ *   +---+---+---+---+---+---+---+---+
  * a: at: [7, 1], weights: [R: 0.75000]
  * b: at: [7, 2], weights: [R: 0.25000]
  */
-async function identifySamplePoints(
+async function identifySamplePoints<T extends Dimensionality>(
   texture: Texture,
+  sampler: GPUSamplerDescriptor,
+  callForSamples: TextureCall<T>,
+  originalCall: TextureCall<T>,
+  texels: TexelView[] | undefined,
   run: (texels: TexelView[]) => Promise<PerTexelComponent<number>>
 ) {
   const info = texture.descriptor;
-  const isCube = texture.viewDescriptor.dimension === 'cube';
+  const isCube = isCubeViewDimension(texture.viewDescriptor);
   const mipLevelCount = texture.descriptor.mipLevelCount ?? 1;
   const mipLevelSize = range(mipLevelCount, mipLevel =>
     virtualMipSize(texture.descriptor.dimension ?? '2d', texture.descriptor.size, mipLevel)
@@ -1914,6 +2923,27 @@ async function identifySamplePoints(
   })();
   const numTexels = numTexelsPerLevel.reduce((sum, v) => sum + v);
 
+  const getMipLevelFromTexelId = (texelId: number) => {
+    for (let mipLevel = mipLevelCount - 1; mipLevel > 0; --mipLevel) {
+      if (texelId - numTexelsOfPrecedingLevels[mipLevel] >= 0) {
+        return mipLevel;
+      }
+    }
+    return 0;
+  };
+
+  const getTexelCoordFromTexelId = (texelId: number) => {
+    const mipLevel = getMipLevelFromTexelId(texelId);
+    const size = mipLevelSize[mipLevel];
+    const texelsPerSlice = size[0] * size[1];
+    const id = texelId - numTexelsOfPrecedingLevels[mipLevel];
+    const layer = Math.floor(id / texelsPerSlice);
+    const xyId = id - layer * texelsPerSlice;
+    const y = (xyId / size[0]) | 0;
+    const x = xyId % size[0];
+    return { x, y, z: layer, mipLevel, xyId };
+  };
+
   // This isn't perfect. We already know there was an error. We're just
   // generating info so it seems okay it's not perfect. This format will
   // be used to generate weights by drawing with a texture of this format
@@ -1934,6 +2964,11 @@ async function identifySamplePoints(
   ) as EncodableTextureFormat;
   const rep = kTexelRepresentationInfo[format];
 
+  const components = isBuiltinGather(callForSamples.builtin) ? kRGBAComponents : rep.componentOrder;
+  const convertResultAsAppropriate = isBuiltinGather(callForSamples.builtin)
+    ? <T>(v: T) => v
+    : convertResultFormatToTexelViewFormat;
+
   // Identify all the texels that are sampled, and their weights.
   const sampledTexelWeights = new Map<number, PerTexelComponent<number>>();
   const unclassifiedStack = [new Set<number>(range(numTexels, v => v))];
@@ -1951,8 +2986,8 @@ async function identifySamplePoints(
       unclassifiedStack.push(setB);
     }
 
-    // See if any of the texels in setA were sampled.
-    const results = convertResultFormatToTexelViewFormat(
+    // See if any of the texels in setA were sampled.0
+    const results = convertResultAsAppropriate(
       await run(
         range(mipLevelCount, mipLevel =>
           TexelView.fromTexelsAsColors(
@@ -1978,7 +3013,7 @@ async function identifySamplePoints(
       ),
       format
     );
-    if (rep.componentOrder.some(c => results[c] !== 0)) {
+    if (components.some(c => results[c] !== 0)) {
       // One or more texels of setA were sampled.
       if (setA.size === 1) {
         // We identified a specific texel was sampled.
@@ -1991,40 +3026,26 @@ async function identifySamplePoints(
     }
   }
 
-  const getMipLevelFromTexelId = (texelId: number) => {
-    for (let mipLevel = mipLevelCount - 1; mipLevel > 0; --mipLevel) {
-      if (texelId - numTexelsOfPrecedingLevels[mipLevel] >= 0) {
-        return mipLevel;
-      }
-    }
-    return 0;
-  };
-
   // separate the sampledTexelWeights by mipLevel, then by layer, within a layer the texelId only includes x and y
   const levels: Map<number, PerTexelComponent<number>>[][] = [];
   for (const [texelId, weight] of sampledTexelWeights.entries()) {
-    const mipLevel = getMipLevelFromTexelId(texelId);
+    const { xyId, z, mipLevel } = getTexelCoordFromTexelId(texelId);
     const level = levels[mipLevel] ?? [];
     levels[mipLevel] = level;
-    const size = mipLevelSize[mipLevel];
-    const texelsPerSlice = size[0] * size[1];
-    const id = texelId - numTexelsOfPrecedingLevels[mipLevel];
-    const layer = Math.floor(id / texelsPerSlice);
-    const layerEntries = level[layer] ?? new Map();
-    level[layer] = layerEntries;
-    const xyId = id - layer * texelsPerSlice;
+    const layerEntries = level[z] ?? new Map();
+    level[z] = layerEntries;
     layerEntries.set(xyId, weight);
   }
 
-  // ┌───┬───┬───┬───┐
-  // │ a │   │   │   │
-  // ├───┼───┼───┼───┤
-  // │   │   │   │   │
-  // ├───┼───┼───┼───┤
-  // │   │   │   │   │
-  // ├───┼───┼───┼───┤
-  // │   │   │   │ b │
-  // └───┴───┴───┴───┘
+  // +---+---+---+---+
+  // | a |   |   |   |
+  // +---+---+---+---+
+  // |   |   |   |   |
+  // +---+---+---+---+
+  // |   |   |   |   |
+  // +---+---+---+---+
+  // |   |   |   | b |
+  // +---+---+---+---+
   const lines: string[] = [];
   const letter = (idx: number) => String.fromCodePoint(idx < 30 ? 97 + idx : idx + 9600 - 30); // 97: 'a'
   let idCount = 0;
@@ -2040,13 +3061,20 @@ async function identifySamplePoints(
 
     for (let layer = 0; layer < depthOrArrayLayers; ++layer) {
       const layerEntries = level[layer];
-      if (!layerEntries) {
-        continue;
-      }
 
       const orderedTexelIndices: number[] = [];
       lines.push('');
-      lines.push(`layer: ${layer}${isCube ? ` (${kFaceNames[layer]})` : ''}`);
+      const unSampled = layerEntries ? '' : 'un-sampled';
+      if (isCube) {
+        const face = kFaceNames[layer % 6];
+        lines.push(`layer: ${layer}, cube-layer: ${(layer / 6) | 0} (${face}) ${unSampled}`);
+      } else {
+        lines.push(`layer: ${layer} ${unSampled}`);
+      }
+
+      if (!layerEntries) {
+        continue;
+      }
 
       {
         let line = '  ';
@@ -2056,57 +3084,92 @@ async function identifySamplePoints(
         lines.push(line);
       }
       {
-        let line = '  ┌';
+        let line = '  +';
         for (let x = 0; x < width; x++) {
-          line += x === width - 1 ? '───┐' : '───┬';
+          line += x === width - 1 ? '---+' : '---+';
         }
         lines.push(line);
       }
       for (let y = 0; y < height; y++) {
         {
-          let line = `${y.toString().padEnd(2)}│`;
+          let line = `${y.toString().padEnd(2)}|`;
           for (let x = 0; x < width; x++) {
             const texelIdx = x + y * texelsPerRow;
             const weight = layerEntries.get(texelIdx);
             if (weight !== undefined) {
-              line += ` ${letter(idCount + orderedTexelIndices.length)} │`;
+              line += ` ${letter(idCount + orderedTexelIndices.length)} |`;
               orderedTexelIndices.push(texelIdx);
             } else {
-              line += '   │';
+              line += '   |';
             }
           }
           lines.push(line);
         }
         if (y < height - 1) {
-          let line = '  ├';
+          let line = '  +';
           for (let x = 0; x < width; x++) {
-            line += x === width - 1 ? '───┤' : '───┼';
+            line += x === width - 1 ? '---+' : '---+';
           }
           lines.push(line);
         }
       }
       {
-        let line = '  └';
+        let line = '  +';
         for (let x = 0; x < width; x++) {
-          line += x === width - 1 ? '───┘' : '───┴';
+          line += x === width - 1 ? '---+' : '---+';
         }
         lines.push(line);
       }
 
       const pad2 = (n: number) => n.toString().padStart(2);
       const fix5 = (n: number) => n.toFixed(5);
+      const formatTexel = (texel: PerTexelComponent<number> | undefined) =>
+        texel
+          ? Object.entries(texel)
+              .map(([k, v]) => `${k}: ${fix5(v)}`)
+              .join(', ')
+          : '*texel values unavailable*';
+
+      const colorLines: string[] = [];
+      const compareLines: string[] = [];
+      let levelWeight = 0;
       orderedTexelIndices.forEach((texelIdx, i) => {
         const weights = layerEntries.get(texelIdx)!;
         const y = Math.floor(texelIdx / texelsPerRow);
         const x = texelIdx % texelsPerRow;
-        const singleWeight = valueIfAllComponentsAreEqual(weights, rep.componentOrder);
+        const singleWeight = valueIfAllComponentsAreEqual(weights, components)!;
+        levelWeight += singleWeight;
         const w =
           singleWeight !== undefined
             ? `weight: ${fix5(singleWeight)}`
-            : `weights: [${rep.componentOrder.map(c => `${c}: ${fix5(weights[c]!)}`).join(', ')}]`;
+            : `weights: [${components.map(c => `${c}: ${fix5(weights[c]!)}`).join(', ')}]`;
         const coord = `${pad2(x)}, ${pad2(y)}, ${pad2(layer)}`;
-        lines.push(`${letter(idCount + i)}: mip(${mipLevel}) at: [${coord}], ${w}`);
+        const texel =
+          texels &&
+          convertToTexelViewFormat(
+            texels[mipLevel].color({ x, y, z: layer }),
+            texture.descriptor.format
+          );
+
+        const texelStr = formatTexel(texel);
+        const id = letter(idCount + i);
+        lines.push(`${id}: mip(${mipLevel}) at: [${coord}], ${w}`);
+        colorLines.push(`${id}: value: ${texelStr}`);
+        if (isBuiltinComparison(originalCall.builtin)) {
+          assert(!!texel);
+          const compareTexel = applyCompare(originalCall, sampler, [TexelComponent.Depth], texel);
+          compareLines.push(
+            `${id}: compare(${sampler.compare}) result with depthRef(${fix5(
+              originalCall.depthRef!
+            )}): ${fix5(compareTexel.Depth!)}`
+          );
+        }
       });
+      lines.push(...colorLines);
+      lines.push(...compareLines);
+      if (!isNaN(levelWeight)) {
+        lines.push(`level weight: ${fix5(levelWeight)}`);
+      }
       idCount += orderedTexelIndices.length;
     }
   }
@@ -2131,9 +3194,13 @@ function layoutTwoColumns(columnA: string[], columnB: string[]) {
  */
 export function getDepthOrArrayLayersForViewDimension(viewDimension?: GPUTextureViewDimension) {
   switch (viewDimension) {
+    case '1d':
+      return 1;
     case undefined:
     case '2d':
       return 1;
+    case '2d-array':
+      return 4;
     case '3d':
       return 8;
     case 'cube':
@@ -2161,9 +3228,12 @@ export function chooseTextureSize({
 }) {
   const { blockWidth, blockHeight } = kTextureFormatInfo[format];
   const width = align(Math.max(minSize, blockWidth * minBlocks), blockWidth);
-  const height = align(Math.max(minSize, blockHeight * minBlocks), blockHeight);
+  const height =
+    viewDimension === '1d' ? 1 : align(Math.max(minSize, blockHeight * minBlocks), blockHeight);
   if (viewDimension === 'cube' || viewDimension === 'cube-array') {
-    const size = lcm(width, height);
+    const blockLCM = lcm(blockWidth, blockHeight);
+    const largest = Math.max(width, height);
+    const size = align(largest, blockLCM);
     return [size, size, viewDimension === 'cube-array' ? 24 : 6];
   }
   const depthOrArrayLayers = getDepthOrArrayLayersForViewDimension(viewDimension);
@@ -2177,11 +3247,17 @@ export const kCubeSamplePointMethods = ['cube-edges', 'texel-centre', 'spiral']
 export type CubeSamplePointMethods = (typeof kSamplePointMethods)[number];
 
 type TextureBuiltinInputArgs = {
+  textureBuiltin?: TextureBuiltin;
   descriptor: GPUTextureDescriptor;
   sampler?: GPUSamplerDescriptor;
+  derivatives?: boolean;
   mipLevel?: RangeDef;
   sampleIndex?: RangeDef;
   arrayIndex?: RangeDef;
+  grad?: boolean;
+  bias?: boolean;
+  component?: boolean;
+  depthRef?: boolean;
   offset?: boolean;
   hashInputs: (number | string | boolean)[];
 };
@@ -2201,7 +3277,19 @@ function generateTextureBuiltinInputsImpl<T extends Dimensionality>(
         radius?: number;
         loops?: number;
       })
-): { coords: T; mipLevel: number; sampleIndex?: number; arrayIndex?: number; offset?: T }[] {
+): {
+  coords: T;
+  derivativeMult?: T;
+  ddx?: T;
+  ddy?: T;
+  mipLevel: number;
+  sampleIndex?: number;
+  arrayIndex?: number;
+  bias?: number;
+  offset?: T;
+  component?: number;
+  depthRef?: number;
+}[] {
   const { method, descriptor } = args;
   const dimension = descriptor.dimension ?? '2d';
   const mipLevelCount = descriptor.mipLevelCount ?? 1;
@@ -2233,15 +3321,27 @@ function generateTextureBuiltinInputsImpl<T extends Dimensionality>(
   const _hashInputs = args.hashInputs.map(v =>
     typeof v === 'string' ? sumOfCharCodesOfString(v) : typeof v === 'boolean' ? (v ? 1 : 0) : v
   );
+
+  // returns a number between [0 and N)
+  const makeRandValue = ({ num, type }: RangeDef, ...hashInputs: number[]) => {
+    const range = num;
+    const number = (hashU32(..._hashInputs, ...hashInputs) / 0x1_0000_0000) * range;
+    return type === 'f32' ? number : Math.floor(number);
+  };
+
+  // for signed and float values returns [-1 to num]
+  // for unsigned values returns [0 to num]
   const makeRangeValue = ({ num, type }: RangeDef, ...hashInputs: number[]) => {
-    const range = num + type === 'u32' ? 1 : 2;
+    const range = num + (type === 'u32' ? 1 : 2);
     const number =
       (hashU32(..._hashInputs, ...hashInputs) / 0x1_0000_0000) * range - (type === 'u32' ? 0 : 1);
     return type === 'f32' ? number : Math.floor(number);
   };
-  const makeIntHashValue = (min: number, max: number, ...hashInputs: number[]) => {
+
+  // Generates the same values per coord instead of using all the extra `_hashInputs`.
+  const makeIntHashValueRepeatable = (min: number, max: number, ...hashInputs: number[]) => {
     const range = max - min;
-    return min + Math.floor((hashU32(..._hashInputs, ...hashInputs) / 0x1_0000_0000) * range);
+    return min + Math.floor((hashU32(...hashInputs) / 0x1_0000_0000) * range);
   };
 
   // Samplers across devices use different methods to interpolate.
@@ -2253,7 +3353,77 @@ function generateTextureBuiltinInputsImpl<T extends Dimensionality>(
   // Linux, AMD Radeon Pro WX 3200: 256
   // MacOS, M1 Mac: 256
   const kSubdivisionsPerTexel = 4;
-  const nearest = !args.sampler || args.sampler.minFilter === 'nearest';
+
+  // When filtering is nearest then we want to avoid edges of texels
+  //
+  //             U
+  //             |
+  //     +---+---+---+---+---+---+---+---+
+  //     |   | A | B |   |   |   |   |   |
+  //     +---+---+---+---+---+---+---+---+
+  //
+  // Above, coordinate U could sample either A or B
+  //
+  //               U
+  //               |
+  //     +---+---+---+---+---+---+---+---+
+  //     |   | A | B | C |   |   |   |   |
+  //     +---+---+---+---+---+---+---+---+
+  //
+  // For textureGather we want to avoid texel centers
+  // as for coordinate U could either gather A,B or B,C.
+
+  const avoidEdgeCase =
+    !args.sampler || args.sampler.minFilter === 'nearest' || isBuiltinGather(args.textureBuiltin);
+  const edgeRemainder = isBuiltinGather(args.textureBuiltin) ? kSubdivisionsPerTexel / 2 : 0;
+
+  // textureGather issues for 2d/3d textures
+  //
+  // If addressModeU is repeat, then on an 8x1 texture, u = 0.01 or u = 0.99
+  // would gather these texels
+  //
+  //     +---+---+---+---+---+---+---+---+
+  //     | * |   |   |   |   |   |   | * |
+  //     +---+---+---+---+---+---+---+---+
+  //
+  // If addressModeU is clamp-to-edge or mirror-repeat,
+  // then on an 8x1 texture, u = 0.01 would gather this texel
+  //
+  //     +---+---+---+---+---+---+---+---+
+  //     | * |   |   |   |   |   |   |   |
+  //     +---+---+---+---+---+---+---+---+
+  //
+  // and 0.99 would gather this texel
+  //
+  //     +---+---+---+---+---+---+---+---+
+  //     |   |   |   |   |   |   |   | * |
+  //     +---+---+---+---+---+---+---+---+
+  //
+  // This means we have to if addressMode is not `repeat`, we
+  // need to avoid the edge of the texture.
+  //
+  // Note: we don't have these specific issues with cube maps
+  // as they ignore addressMode
+  const euclideanModulo = (n: number, m: number) => ((n % m) + m) % m;
+  const addressMode: GPUAddressMode[] =
+    args.textureBuiltin === 'textureSampleBaseClampToEdge'
+      ? ['clamp-to-edge', 'clamp-to-edge', 'clamp-to-edge']
+      : [
+          args.sampler?.addressModeU ?? 'clamp-to-edge',
+          args.sampler?.addressModeV ?? 'clamp-to-edge',
+          args.sampler?.addressModeW ?? 'clamp-to-edge',
+        ];
+  const avoidTextureEdge = (axis: number, textureDimensionUnits: number, v: number) => {
+    assert(isBuiltinGather(args.textureBuiltin));
+    if (addressMode[axis] === 'repeat') {
+      return v;
+    }
+    const inside = euclideanModulo(v, textureDimensionUnits);
+    const outside = v - inside;
+    return outside + clamp(inside, { min: 1, max: textureDimensionUnits - 1 });
+  };
+
+  const numComponents = isDepthOrStencilTextureFormat(descriptor.format) ? 1 : 4;
   return coords.map((c, i) => {
     const mipLevel = args.mipLevel
       ? quantizeMipLevel(makeRangeValue(args.mipLevel, i), args.sampler?.mipmapFilter ?? 'nearest')
@@ -2265,27 +3435,115 @@ function generateTextureBuiltinInputsImpl<T extends Dimensionality>(
     const coords = c.map((v, i) => {
       // Quantize to kSubdivisionsPerPixel
       const v1 = Math.floor(v * q[i]);
-      // If it's nearest and we're on the edge of a texel then move us off the edge
-      // since the edge could choose one texel or another in nearest mode
-      const v2 = nearest && v1 % kSubdivisionsPerTexel === 0 ? v1 + 1 : v1;
+      // If it's nearest or textureGather and we're on the edge of a texel then move us off the edge
+      // since the edge could choose one texel or another.
+      const isTexelEdgeCase = Math.abs(v1 % kSubdivisionsPerTexel) === edgeRemainder;
+      const v2 = isTexelEdgeCase && avoidEdgeCase ? v1 + 1 : v1;
+      const v3 = isBuiltinGather(args.textureBuiltin) ? avoidTextureEdge(i, q[i], v2) : v2;
       // Convert back to texture coords
-      return v2 / q[i];
+      return v3 / q[i];
     }) as T;
 
+    const makeGradient = <T>(hashInput: number): T => {
+      return coords.map((_, i) => {
+        // a value between -4 and 4 integer then add +/- 0.25
+        // We want to be able to choose levels but we want to avoid the area where the
+        // gpu might choose 2 different levels than the software renderer.
+        const intPart = makeRangeValue({ num: 8, type: 'u32' }, i, hashInput) - 4;
+        const fractPart = makeRangeValue({ num: 0, type: 'f32' }, i, hashInput + 1) * 0.25;
+        assert(fractPart >= -0.25 && fractPart <= 0.25);
+        return intPart + fractPart;
+      }) as T;
+    };
+
+    // choose a derivative value that will select a mipLevel.
+    const makeDerivativeMult = (coords: T, mipLevel: number): T => {
+      // Make an identity vec (all 1s).
+      const mult = new Array(coords.length).fill(0);
+      // choose one axis to set
+      const ndx = makeRangeValue({ num: coords.length - 1, type: 'u32' }, i, 8);
+      assert(ndx < coords.length);
+      mult[ndx] = Math.pow(2, mipLevel);
+      return mult as T;
+    };
+
+    // Choose a mip level. If mipmapFilter is 'nearest' then avoid centers of levels
+    // else avoid edges.
+    const chooseMipLevel = () => {
+      const innerLevelR = makeRandValue({ num: 9, type: 'u32' }, i, 11);
+      const innerLevel =
+        args?.sampler?.mipmapFilter === 'linear'
+          ? innerLevelR + 1
+          : innerLevelR < 5
+          ? innerLevelR
+          : innerLevelR + 1;
+      const outerLevel = makeRangeValue({ num: mipLevelCount - 1, type: 'i32' }, i, 11);
+      return outerLevel + innerLevel / 10;
+    };
+
+    // for textureSample, choose a derivative value that will select a mipLevel near
+    // the range of mip levels.
+    const makeDerivativeMultForTextureSample = (coords: T): T => {
+      const mipLevel = chooseMipLevel();
+      return makeDerivativeMult(coords, mipLevel);
+    };
+
+    // for textureSampleBias we choose a mipLevel we want to sample, then a bias between -17 and 17.
+    // and then a derivative that, given the chosen bias will arrive at the chosen mipLevel.
+    // The GPU is supposed to clamp between -16.0 and 15.99.
+    const makeBiasAndDerivativeMult = (coords: T): [number, T] => {
+      const mipLevel = chooseMipLevel();
+      const bias = makeRangeValue({ num: 34, type: 'f32' }, i, 9) - 17;
+      const clampedBias = clamp(bias, { min: -16, max: 15.99 });
+      const derivativeBasedMipLevel = mipLevel - clampedBias;
+      const derivativeMult = makeDerivativeMult(coords, derivativeBasedMipLevel);
+      return [bias, derivativeMult];
+    };
+
+    // If bias is set this is textureSampleBias. If bias is not set but derivatives
+    // is then this is one of the other functions that needs implicit derivatives.
+    const [bias, derivativeMult] = args.bias
+      ? makeBiasAndDerivativeMult(coords)
+      : args.derivatives
+      ? [undefined, makeDerivativeMultForTextureSample(coords)]
+      : [];
+
     return {
       coords,
+      derivativeMult,
       mipLevel,
       sampleIndex: args.sampleIndex ? makeRangeValue(args.sampleIndex, i, 1) : undefined,
       arrayIndex: args.arrayIndex ? makeRangeValue(args.arrayIndex, i, 2) : undefined,
+      // use 0.0, 0.5, or 1.0 for depthRef. We can't test for equality except for values 0 and 1
+      // The texture will be filled with random values unless our comparison is 'equal' or 'not-equal'
+      // in which case the texture will be filled with only 0, 0.6, 1. Choosing 0.0, 0.5, 1.0 here
+      // means we can test 'equal' and 'not-equal'. For other comparisons, the fact that the texture's
+      // contents is random seems enough to test all the comparison modes.
+      depthRef: args.depthRef ? makeRandValue({ num: 3, type: 'u32' }, i, 5) / 2 : undefined,
+      ddx: args.grad ? makeGradient(7) : undefined,
+      ddy: args.grad ? makeGradient(8) : undefined,
+      bias,
       offset: args.offset
-        ? (coords.map((_, j) => makeIntHashValue(-8, 8, i, 3 + j)) as T)
+        ? (coords.map((_, j) => makeIntHashValueRepeatable(-8, 8, i, 3 + j)) as T)
         : undefined,
+      component: args.component ? makeIntHashValueRepeatable(0, numComponents, i, 4) : undefined,
     };
   });
 }
 
+/**
+ * When mipmapFilter === 'nearest' we need to stay away from 0.5
+ * because the GPU could decide to choose one mip or the other.
+ *
+ * Some example transition values, the value at which the GPU chooses
+ * mip level 1 over mip level 0:
+ *
+ * M1 Mac: 0.515381
+ * Intel Mac: 0.49999
+ * AMD Mac: 0.5
+ */
 const kMipEpsilon = 0.02;
-function quantizeMipLevel(mipLevel: number, mipmapFilter: GPUFilterMode) {
+function quantizeMipLevel(mipLevel: number, mipmapFilter: GPUMipmapFilterMode) {
   if (mipmapFilter === 'linear') {
     return mipLevel;
   }
@@ -2360,7 +3618,7 @@ function normalize(v: vec3): vec3 {
 /**
  * Converts a cube map coordinate to a uv coordinate (0 to 1) and layer (0.5/6.0 to 5.5/6.0).
  */
-export function convertCubeCoordToNormalized3DTextureCoord(v: vec3): vec3 {
+function convertCubeCoordToNormalized3DTextureCoord(v: vec3): vec3 {
   let uvw;
   let layer;
   // normalize the coord.
@@ -2389,141 +3647,41 @@ export function convertCubeCoordToNormalized3DTextureCoord(v: vec3): vec3 {
 /**
  * Convert a 3d texcoord into a cube map coordinate.
  */
-export function convertNormalized3DTexCoordToCubeCoord(uvLayer: vec3) {
+function convertNormalized3DTexCoordToCubeCoord(uvLayer: vec3) {
   const [u, v, faceLayer] = uvLayer;
   return normalize(transformMat3([u, v, 1], kFaceUVMatrices[Math.min(5, faceLayer * 6) | 0]));
 }
 
 /**
+ * Wrap a texel based face coord across cube faces
+ *
  * We have a face texture in texels coord where U/V choose a texel and W chooses the face.
  * If U/V are outside the size of the texture then, when normalized and converted
  * to a cube map coordinate, they'll end up pointing to a different face.
  *
  * addressMode is effectively ignored for cube
  *
- *             +-----------+
- *             |0->u       |
- *             |↓          |
- *             |v   +y     |
- *             |    (2)    |
- *             |           |
- * +-----------+-----------+-----------+-----------+
- * |0->u       |0->u       |0->u       |0->u       |
- * |↓          |↓          |↓          |↓          |
- * |v   -x     |v   +z     |v   +x     |v   -z     |
- * |    (1)    |    (4)    |    (0)    |    (5)    |
- * |           |           |           |           |
- * +-----------+-----------+-----------+-----------+
- *             |0->u       |
- *             |↓          |
- *             |v   -y     |
- *             |    (3)    |
- *             |           |
- *             +-----------+
+ * By converting from a texel based coord to a normalized coord and then to a cube map coord,
+ * if the texel was outside of the face, the cube map coord will end up pointing to a different
+ * face. We then convert back cube coord -> normalized face coord -> texel based coord
  */
-const kFaceConversions = {
-  u: (textureSize: number, faceCoord: vec3) => faceCoord[0],
-  v: (textureSize: number, faceCoord: vec3) => faceCoord[1],
-  'u+t': (textureSize: number, faceCoord: vec3) => faceCoord[0] + textureSize,
-  'u-t': (textureSize: number, faceCoord: vec3) => faceCoord[0] - textureSize,
-  'v+t': (textureSize: number, faceCoord: vec3) => faceCoord[1] + textureSize,
-  'v-t': (textureSize: number, faceCoord: vec3) => faceCoord[1] - textureSize,
-  't-v': (textureSize: number, faceCoord: vec3) => textureSize - faceCoord[1],
-  '1+u': (textureSize: number, faceCoord: vec3) => 1 + faceCoord[0],
-  '1+v': (textureSize: number, faceCoord: vec3) => 1 + faceCoord[1],
-  '-v-1': (textureSize: number, faceCoord: vec3) => -faceCoord[1] - 1,
-  't-u-1': (textureSize: number, faceCoord: vec3) => textureSize - faceCoord[0] - 1,
-  't-v-1': (textureSize: number, faceCoord: vec3) => textureSize - faceCoord[1] - 1,
-  '2t-u-1': (textureSize: number, faceCoord: vec3) => textureSize * 2 - faceCoord[0] - 1,
-  '2t-v-1': (textureSize: number, faceCoord: vec3) => textureSize * 2 - faceCoord[1] - 1,
-} as const;
-const kFaceConversionEnums = keysOf(kFaceConversions);
-type FaceCoordConversion = (typeof kFaceConversionEnums)[number];
-
-// For Each face
-//   face to go if u < 0
-//   face to go if u >= textureSize
-//   face to go if v < 0
-//   face to go if v >= textureSize
-const kFaceToFaceRemap: { to: number; u: FaceCoordConversion; v: FaceCoordConversion }[][] = [
-  // 0
-  [
-    /* -u */ { to: 4, u: 'u+t', v: 'v' },
-    /* +u */ { to: 5, u: 'u-t', v: 'v' },
-    /* -v */ { to: 2, u: 'v+t', v: 't-u-1' },
-    /* +v */ { to: 3, u: '2t-v-1', v: 'u' },
-  ],
-  // 1
-  [
-    /* -u */ { to: 5, u: 'u+t', v: 'v' },
-    /* +u */ { to: 4, u: 'u-t', v: 'v' },
-    /* -v */ { to: 2, u: '-v-1', v: 'u' }, // -1->0, -2->1  -3->2
-    /* +v */ { to: 3, u: 't-v', v: 't-u-1' },
-  ],
-  // 2
-  [
-    /* -u */ { to: 1, u: 'v', v: '1+u' },
-    /* +u */ { to: 0, u: 't-v-1', v: 'u-t' },
-    /* -v */ { to: 5, u: 't-u-1', v: '-v-1' },
-    /* +v */ { to: 4, u: 'u', v: 'v-t' },
-  ],
-  // 3
-  [
-    /* -u */ { to: 1, u: 't-v-1', v: 'u+t' },
-    /* +u */ { to: 0, u: 'v', v: '2t-u-1' },
-    /* -v */ { to: 4, u: 'u', v: 'v+t' },
-    /* +v */ { to: 5, u: 't-u-1', v: '2t-v-1' },
-  ],
-  // 4
-  [
-    /* -u */ { to: 1, u: 'u+t', v: 'v' },
-    /* +u */ { to: 0, u: 'u-t', v: 'v' },
-    /* -v */ { to: 2, u: 'u', v: 'v+t' },
-    /* +v */ { to: 3, u: 'u', v: 'v-t' },
-  ],
-  // 5
-  [
-    /* -u */ { to: 0, u: 'u+t', v: 'v' },
-    /* +u */ { to: 1, u: 'u-t', v: 'v' },
-    /* -v */ { to: 2, u: 't-u-1', v: '1+v' },
-    /* +v */ { to: 3, u: 't-u-1', v: '2t-v-1' },
-  ],
-];
-
-function getFaceWrapIndex(textureSize: number, faceCoord: vec3) {
-  if (faceCoord[0] < 0) {
-    return 0;
-  }
-  if (faceCoord[0] >= textureSize) {
-    return 1;
-  }
-  if (faceCoord[1] < 0) {
-    return 2;
-  }
-  if (faceCoord[1] >= textureSize) {
-    return 3;
-  }
-  return -1;
-}
-
-function applyFaceWrap(textureSize: number, faceCoord: vec3): vec3 {
-  const ndx = getFaceWrapIndex(textureSize, faceCoord);
-  if (ndx < 0) {
-    return faceCoord;
-  }
-  const { to, u, v } = kFaceToFaceRemap[faceCoord[2]][ndx];
-  return [
-    kFaceConversions[u](textureSize, faceCoord),
-    kFaceConversions[v](textureSize, faceCoord),
-    to,
+function wrapFaceCoordToCubeFaceAtEdgeBoundaries(textureSize: number, faceCoord: vec3) {
+  // convert texel based face coord to normalized 2d-array coord
+  const nc0: vec3 = [
+    (faceCoord[0] + 0.5) / textureSize,
+    (faceCoord[1] + 0.5) / textureSize,
+    (faceCoord[2] + 0.5) / 6,
+  ];
+  const cc = convertNormalized3DTexCoordToCubeCoord(nc0);
+  const nc1 = convertCubeCoordToNormalized3DTextureCoord(cc);
+  // convert normalized 2d-array coord back texel based face coord
+  const fc = [
+    Math.floor(nc1[0] * textureSize),
+    Math.floor(nc1[1] * textureSize),
+    Math.floor(nc1[2] * 6),
   ];
-}
 
-function wrapFaceCoordToCubeFaceAtEdgeBoundaries(textureSize: number, faceCoord: vec3) {
-  // If we're off both edges we need to wrap twice, once for each edge.
-  const faceCoord1 = applyFaceWrap(textureSize, faceCoord);
-  const faceCoord2 = applyFaceWrap(textureSize, faceCoord1);
-  return faceCoord2;
+  return fc;
 }
 
 function applyAddressModesToCoords(
@@ -2567,9 +3725,15 @@ export function generateSamplePointsCube(
       })
 ): {
   coords: vec3;
+  derivativeMult?: vec3;
+  ddx?: vec3;
+  ddy?: vec3;
   mipLevel: number;
   arrayIndex?: number;
+  bias?: number;
   offset?: undefined;
+  component?: number;
+  depthRef?: number;
 }[] {
   const { method, descriptor } = args;
   const mipLevelCount = descriptor.mipLevelCount ?? 1;
@@ -2610,20 +3774,38 @@ export function generateSamplePointsCube(
       /* prettier-ignore */
       coords.push(
         // between edges
-        [-1.01, -1.02,  0],
-        [ 1.01, -1.02,  0],
-        [-1.01,  1.02,  0],
-        [ 1.01,  1.02,  0],
-
-        [-1.01,  0, -1.02],
-        [ 1.01,  0, -1.02],
-        [-1.01,  0,  1.02],
-        [ 1.01,  0,  1.02],
-
-        [-1.01, -1.02,  0],
-        [ 1.01, -1.02,  0],
-        [-1.01,  1.02,  0],
-        [ 1.01,  1.02,  0],
+        // +x
+        [  1   , -1.01,  0    ],  // wrap -y
+        [  1   , +1.01,  0    ],  // wrap +y
+        [  1   ,  0   , -1.01 ],  // wrap -z
+        [  1   ,  0   , +1.01 ],  // wrap +z
+        // -x
+        [ -1   , -1.01,  0    ],  // wrap -y
+        [ -1   , +1.01,  0    ],  // wrap +y
+        [ -1   ,  0   , -1.01 ],  // wrap -z
+        [ -1   ,  0   , +1.01 ],  // wrap +z
+
+        // +y
+        [ -1.01,  1   ,  0    ],  // wrap -x
+        [ +1.01,  1   ,  0    ],  // wrap +x
+        [  0   ,  1   , -1.01 ],  // wrap -z
+        [  0   ,  1   , +1.01 ],  // wrap +z
+        // -y
+        [ -1.01, -1   ,  0    ],  // wrap -x
+        [ +1.01, -1   ,  0    ],  // wrap +x
+        [  0   , -1   , -1.01 ],  // wrap -z
+        [  0   , -1   , +1.01 ],  // wrap +z
+
+        // +z
+        [ -1.01,  0   ,  1    ],  // wrap -x
+        [ +1.01,  0   ,  1    ],  // wrap +x
+        [  0   , -1.01,  1    ],  // wrap -y
+        [  0   , +1.01,  1    ],  // wrap +y
+        // -z
+        [ -1.01,  0   , -1    ],  // wrap -x
+        [ +1.01,  0   , -1    ],  // wrap +x
+        [  0   , -1.01, -1    ],  // wrap -y
+        [  0   , +1.01, -1    ],  // wrap +y
 
         // corners (see comment "Issues with corners of cubemaps")
         // for why these are commented out.
@@ -2643,13 +3825,28 @@ export function generateSamplePointsCube(
   const _hashInputs = args.hashInputs.map(v =>
     typeof v === 'string' ? sumOfCharCodesOfString(v) : typeof v === 'boolean' ? (v ? 1 : 0) : v
   );
+
+  // returns a number between [0 and N)
+  const makeRandValue = ({ num, type }: RangeDef, ...hashInputs: number[]) => {
+    const range = num;
+    const number = (hashU32(..._hashInputs, ...hashInputs) / 0x1_0000_0000) * range;
+    return type === 'f32' ? number : Math.floor(number);
+  };
+
+  // for signed and float values returns [-1 to num]
+  // for unsigned values returns [0 to num]
   const makeRangeValue = ({ num, type }: RangeDef, ...hashInputs: number[]) => {
-    const range = num + type === 'u32' ? 1 : 2;
+    const range = num + (type === 'u32' ? 1 : 2);
     const number =
       (hashU32(..._hashInputs, ...hashInputs) / 0x1_0000_0000) * range - (type === 'u32' ? 0 : 1);
     return type === 'f32' ? number : Math.floor(number);
   };
 
+  const makeIntHashValue = (min: number, max: number, ...hashInputs: number[]) => {
+    const range = max - min;
+    return min + Math.floor((hashU32(..._hashInputs, ...hashInputs) / 0x1_0000_0000) * range);
+  };
+
   // Samplers across devices use different methods to interpolate.
   // Quantizing the texture coordinates seems to hit coords that produce
   // comparable results to our computed results.
@@ -2658,12 +3855,102 @@ export function generateSamplePointsCube(
   // Win 11, NVidia 2070 Super: 16
   // Linux, AMD Radeon Pro WX 3200: 256
   // MacOS, M1 Mac: 256
+  //
+  // Note: When doing `textureGather...` we can't use texel centers
+  // because which 4 pixels will be gathered jumps if we're slightly under
+  // or slightly over the center
+  //
+  // Similarly, if we're using 'nearest' filtering then we don't want texel
+  // edges for the same reason.
+  //
+  // Also note that for textureGather. The way it works for cube maps is to
+  // first convert from cube map coordinate to a 2D texture coordinate and
+  // a face. Then, choose 4 texels just like normal 2D texture coordinates.
+  // If one of the 4 texels is outside the current face, wrap it to the correct
+  // face.
+  //
+  // An issue this brings up though. Imagine a 2D texture with addressMode = 'repeat'
+  //
+  //       2d texture   (same texture repeated to show 'repeat')
+  //     ┌───┬───┬───┐     ┌───┬───┬───┐
+  //     │   │   │   │     │   │   │   │
+  //     ├───┼───┼───┤     ├───┼───┼───┤
+  //     │   │   │  a│     │c  │   │   │
+  //     ├───┼───┼───┤     ├───┼───┼───┤
+  //     │   │   │  b│     │d  │   │   │
+  //     └───┴───┴───┘     └───┴───┴───┘
+  //
+  // Assume the texture coordinate is at the bottom right corner of a.
+  // Then textureGather will grab c, d, b, a (no idea why that order).
+  // but think of it as top-right, bottom-right, bottom-left, top-left.
+  // Similarly, if the texture coordinate is at the top left of d it
+  // will select the same 4 texels.
+  //
+  // But, in the case of a cubemap, each face is in different direction
+  // relative to the face next to it.
+  //
+  //             +-----------+
+  //             |0->u       |
+  //             |↓          |
+  //             |v   +y     |
+  //             |    (2)    |
+  //             |           |
+  // +-----------+-----------+-----------+-----------+
+  // |0->u       |0->u       |0->u       |0->u       |
+  // |↓          |↓          |↓          |↓          |
+  // |v   -x     |v   +z     |v   +x     |v   -z     |
+  // |    (1)    |    (4)    |    (0)    |    (5)    |
+  // |           |           |           |           |
+  // +-----------+-----------+-----------+-----------+
+  //             |0->u       |
+  //             |↓          |
+  //             |v   -y     |
+  //             |    (3)    |
+  //             |           |
+  //             +-----------+
+  //
+  // As an example, imagine going from the +y to the +x face.
+  // See diagram above, the right edge of the +y face wraps
+  // to the top edge of the +x face.
+  //
+  //                             +---+---+
+  //                             |  a|c  |
+  //     ┌───┬───┬───┐           ┌───┬───┬───┐
+  //     │   │   │   │           │  b│d  │   │
+  //     ├───┼───┼───┤---+       ├───┼───┼───┤
+  //     │   │   │  a│ c |       │   │   │   │
+  //     ├───┼───┼───┤---+       ├───┼───┼───┤
+  //     │   │   │  b│ d |       │   │   │   │
+  //     └───┴───┴───┘---+       └───┴───┴───┘
+  //        +y face                 +x face
+  //
+  // If the texture coordinate is in the bottom right corner of a,
+  // the rectangle of texels we read are a,b,c,d and, if we the
+  // texture coordinate is in the top left corner of d we also
+  // read a,b,c,d according to the 2 diagrams above.
+  //
+  // But, notice that when reading from the POV of +y vs +x,
+  // which actual a,b,c,d texels are different.
+  //
+  // From the POV of face +x: a,b are in face +x and c,d are in face +y
+  // From the POV of face +y: a,c are in face +x and b,d are in face +y
+  //
+  // This is all the long way of saying that if we're on the edge of a cube
+  // face we could get drastically different results because the orientation
+  // of the rectangle of the 4 texels we use, rotates. So, we need to avoid
+  // any values too close to the edge just in case our math is different than
+  // the GPU's.
+  //
   const kSubdivisionsPerTexel = 4;
-  const nearest = !args.sampler || args.sampler.minFilter === 'nearest';
+  const avoidEdgeCase =
+    !args.sampler || args.sampler.minFilter === 'nearest' || isBuiltinGather(args.textureBuiltin);
+  const edgeRemainder = isBuiltinGather(args.textureBuiltin) ? kSubdivisionsPerTexel / 2 : 0;
   return coords.map((c, i) => {
-    const mipLevel = args.mipLevel ? makeRangeValue(args.mipLevel, i) : 0;
+    const mipLevel = args.mipLevel
+      ? quantizeMipLevel(makeRangeValue(args.mipLevel, i), args.sampler?.mipmapFilter ?? 'nearest')
+      : 0;
     const clampedMipLevel = clamp(mipLevel, { min: 0, max: mipLevelCount - 1 });
-    const mipSize = virtualMipSize('2d', size, clampedMipLevel);
+    const mipSize = virtualMipSize('2d', size, Math.ceil(clampedMipLevel));
     const q = [
       mipSize[0] * kSubdivisionsPerTexel,
       mipSize[0] * kSubdivisionsPerTexel,
@@ -2683,17 +3970,92 @@ export function generateSamplePointsCube(
     const quantizedUVW = uvw.map((v, i) => {
       // Quantize to kSubdivisionsPerPixel
       const v1 = Math.floor(v * q[i]);
-      // If it's nearest and we're on the edge of a texel then move us off the edge
-      // since the edge could choose one texel or another in nearest mode
-      const v2 = nearest && v1 % kSubdivisionsPerTexel === 0 ? v1 + 1 : v1;
-      // Convert back to texture coords
-      return v2 / q[i];
+      // If it's nearest or textureGather and we're on the edge of a texel then move us off the edge
+      // since the edge could choose one texel or another.
+      const isEdgeCase = Math.abs(v1 % kSubdivisionsPerTexel) === edgeRemainder;
+      const v2 = isEdgeCase && avoidEdgeCase ? v1 + 1 : v1;
+      // Convert back to texture coords slightly off
+      return (v2 + 1 / 16) / q[i];
     }) as vec3;
+
+    const quantize = (v: number, units: number) => Math.floor(v * units) * units;
+
+    const makeGradient = <T>(hashInput: number): T => {
+      return coords.map((_, i) =>
+        // a value between -4 and 4, quantized to 1/3rd.
+        quantize(makeRangeValue({ num: 8, type: 'f32' }, i, hashInput) - 4, 1 / 3)
+      ) as T;
+    };
+
     const coords = convertNormalized3DTexCoordToCubeCoord(quantizedUVW);
+
+    // choose a derivative value that will select a mipLevel.
+    const makeDerivativeMult = (coords: vec3, mipLevel: number): vec3 => {
+      // Make an identity vec (all 1s).
+      const mult = new Array(coords.length).fill(0);
+      // choose one axis to set
+      const ndx = makeRangeValue({ num: coords.length - 1, type: 'u32' }, i, 8);
+      assert(ndx < coords.length);
+      mult[ndx] = Math.pow(2, mipLevel);
+      return mult as vec3;
+    };
+
+    // Choose a mip level. If mipmapFilter is 'nearest' then avoid centers of levels
+    // else avoid edges.
+    const chooseMipLevel = () => {
+      const innerLevelR = makeRandValue({ num: 9, type: 'u32' }, i, 11);
+      const innerLevel =
+        args?.sampler?.mipmapFilter === 'linear'
+          ? innerLevelR + 1
+          : innerLevelR < 4
+          ? innerLevelR
+          : innerLevelR + 1;
+      const outerLevel = makeRangeValue({ num: mipLevelCount - 1, type: 'i32' }, i, 11);
+      return outerLevel + innerLevel / 10;
+    };
+
+    // for textureSample, choose a derivative value that will select a mipLevel near
+    // the range of mip levels.
+    const makeDerivativeMultForTextureSample = (coords: vec3): vec3 => {
+      const mipLevel = chooseMipLevel();
+      return makeDerivativeMult(coords, mipLevel);
+    };
+
+    // for textureSampleBias we choose a mipLevel we want to sample, then a bias between -17 and 17.
+    // and then a derivative that, given the chosen bias will arrive at the chosen mipLevel.
+    // The GPU is supposed to clamp between -16.0 and 15.99.
+    const makeBiasAndDerivativeMult = (coords: vec3): [number, vec3] => {
+      const mipLevel = chooseMipLevel();
+      const bias = makeRangeValue({ num: 34, type: 'f32' }, i, 9) - 17;
+      const clampedBias = clamp(bias, { min: -16, max: 15.99 });
+      const derivativeBasedMipLevel = mipLevel - clampedBias;
+      const derivativeMult = makeDerivativeMult(coords, derivativeBasedMipLevel);
+      return [bias, derivativeMult];
+    };
+
+    // If bias is set this is textureSampleBias. If bias is not set but derivatives
+    // is then this is one of the other functions that needs implicit derivatives.
+    const [bias, derivativeMult] = args.bias
+      ? makeBiasAndDerivativeMult(coords)
+      : args.derivatives
+      ? [undefined, makeDerivativeMultForTextureSample(coords)]
+      : [];
+
     return {
       coords,
+      derivativeMult,
+      ddx: args.grad ? makeGradient(7) : undefined,
+      ddy: args.grad ? makeGradient(8) : undefined,
       mipLevel,
       arrayIndex: args.arrayIndex ? makeRangeValue(args.arrayIndex, i, 2) : undefined,
+      bias,
+      // use 0.0, 0.5, or 1.0 for depthRef. We can't test for equality except for values 0 and 1
+      // The texture will be filled with random values unless our comparison is 'equal' or 'not-equal'
+      // in which case the texture will be filled with only 0, 0.6, 1. Choosing 0.0, 0.5, 1.0 here
+      // means we can test 'equal' and 'not-equal'. For other comparisons, the fact that the texture's
+      // contents is random seems enough to test all the comparison modes.
+      depthRef: args.depthRef ? makeRandValue({ num: 3, type: 'u32' }, i, 5) / 2 : undefined,
+      component: args.component ? makeIntHashValue(0, 4, i, 4) : undefined,
     };
   });
 }
@@ -2714,7 +4076,9 @@ function wgslTypeFor(data: number | Dimensionality, type: 'f' | 'i' | 'u'): stri
   return `${type}32`;
 }
 
-function wgslExpr(data: number | vec1 | vec2 | vec3 | vec4): string {
+function wgslExpr(
+  data: number | Readonly<vec1> | Readonly<vec2> | Readonly<vec3> | Readonly<vec4>
+): string {
   if (Array.isArray(data)) {
     switch (data.length) {
       case 1:
@@ -2751,8 +4115,8 @@ function binKey<T extends Dimensionality>(call: TextureCall<T>): string {
   for (const name of kTextureCallArgNames) {
     const value = call[name];
     if (value !== undefined) {
-      if (name === 'offset') {
-        // offset must be a constant expression
+      if (name === 'offset' || name === 'component') {
+        // offset and component must be constant expressions
         keys.push(`${name}: ${wgslExpr(value)}`);
       } else {
         keys.push(`${name}: ${wgslTypeFor(value, call.coordType)}`);
@@ -2763,12 +4127,19 @@ function binKey<T extends Dimensionality>(call: TextureCall<T>): string {
 }
 
 function buildBinnedCalls<T extends Dimensionality>(calls: TextureCall<T>[]) {
-  const args: string[] = ['T']; // All texture builtins take the texture as the first argument
+  const args: string[] = [];
   const fields: string[] = [];
   const data: number[] = [];
-
   const prototype = calls[0];
-  if (prototype.builtin.startsWith('textureSample')) {
+
+  if (isBuiltinGather(prototype.builtin) && prototype['componentType']) {
+    args.push(`/* component */ ${wgslExpr(prototype['component']!)}`);
+  }
+
+  // All texture builtins take a Texture
+  args.push('T');
+
+  if (builtinNeedsSampler(prototype.builtin)) {
     // textureSample*() builtins take a sampler as the second argument
     args.push('S');
   }
@@ -2778,6 +4149,8 @@ function buildBinnedCalls<T extends Dimensionality>(calls: TextureCall<T>[]) {
     if (value !== undefined) {
       if (name === 'offset') {
         args.push(`/* offset */ ${wgslExpr(value)}`);
+      } else if (name === 'component') {
+        // was handled above
       } else {
         const type =
           name === 'mipLevel'
@@ -2786,8 +4159,18 @@ function buildBinnedCalls<T extends Dimensionality>(calls: TextureCall<T>[]) {
             ? prototype.arrayIndexType!
             : name === 'sampleIndex'
             ? prototype.sampleIndexType!
+            : name === 'bias' || name === 'depthRef' || name === 'ddx' || name === 'ddy'
+            ? 'f'
             : prototype.coordType;
-        args.push(`args.${name}`);
+        if (name !== 'derivativeMult') {
+          args.push(
+            `args.${name}${
+              name === 'coords' && builtinNeedsDerivatives(prototype.builtin)
+                ? ' + derivativeBase * args.derivativeMult'
+                : ''
+            }`
+          );
+        }
         fields.push(`@align(16) ${name} : ${wgslTypeFor(value, type)}`);
       }
     }
@@ -2800,7 +4183,7 @@ function buildBinnedCalls<T extends Dimensionality>(calls: TextureCall<T>[]) {
         (prototype[name] === undefined) === (value === undefined),
         'texture calls are not binned correctly'
       );
-      if (value !== undefined && name !== 'offset') {
+      if (value !== undefined && name !== 'offset' && name !== 'component') {
         const type = getCallArgType<T>(call, name);
         const bitcastToU32 = kBitCastFunctions[type];
         if (value instanceof Array) {
@@ -2839,22 +4222,39 @@ function binCalls<T extends Dimensionality>(calls: TextureCall<T>[]): number[][]
   return bins;
 }
 
-export function describeTextureCall<T extends Dimensionality>(call: TextureCall<T>): string {
-  const args: string[] = ['texture: T'];
-  if (call.builtin.startsWith('textureSample')) {
+function describeTextureCall<T extends Dimensionality>(call: TextureCall<T>): string {
+  const args: string[] = [];
+  if (isBuiltinGather(call.builtin) && call.componentType) {
+    args.push(`component: ${wgslExprFor(call.component!, call.componentType)}`);
+  }
+  args.push('texture: T');
+  if (builtinNeedsSampler(call.builtin)) {
     args.push('sampler: S');
   }
   for (const name of kTextureCallArgNames) {
     const value = call[name];
-    if (value !== undefined) {
+    if (value !== undefined && name !== 'component') {
       if (name === 'coords') {
+        const derivativeWGSL = builtinNeedsDerivatives(call.builtin)
+          ? ` + derivativeBase * derivativeMult(${
+              call.derivativeMult ? wgslExprFor(call.derivativeMult, call.coordType) : '1'
+            })`
+          : '';
+        args.push(`${name}: ${wgslExprFor(value, call.coordType)}${derivativeWGSL}`);
+      } else if (name === 'derivativeMult') {
+        // skip this - it's covered in 'coords'
+      } else if (name === 'ddx' || name === 'ddy') {
         args.push(`${name}: ${wgslExprFor(value, call.coordType)}`);
       } else if (name === 'mipLevel') {
         args.push(`${name}: ${wgslExprFor(value, call.levelType!)}`);
       } else if (name === 'arrayIndex') {
         args.push(`${name}: ${wgslExprFor(value, call.arrayIndexType!)}`);
+      } else if (name === 'bias') {
+        args.push(`${name}: ${wgslExprFor(value, 'f')}`);
       } else if (name === 'sampleIndex') {
         args.push(`${name}: ${wgslExprFor(value, call.sampleIndexType!)}`);
+      } else if (name === 'depthRef') {
+        args.push(`${name}: ${wgslExprFor(value, 'f')}`);
       } else {
         args.push(`${name}: ${wgslExpr(value)}`);
       }
@@ -2863,27 +4263,95 @@ export function describeTextureCall<T extends Dimensionality>(call: TextureCall<
   return `${call.builtin}(${args.join(', ')})`;
 }
 
-const s_deviceToPipelines = new WeakMap<GPUDevice, Map<string, GPURenderPipeline>>();
+const s_deviceToPipelines = new WeakMap<
+  GPUDevice,
+  Map<string, GPURenderPipeline | GPUComputePipeline>
+>();
 
 /**
  * Given a list of "calls", each one of which has a texture coordinate,
- * generates a fragment shader that uses the fragment position as an index
- * (position.y * 256 + position.x) That index is then used to look up a
- * coordinate from a storage buffer which is used to call the WGSL texture
- * function to read/sample the texture, and then write to an rgba32float
- * texture.  We then read the rgba32float texture for the per "call" results.
+ * generates a fragment shader that uses the instance_index as an index. That
+ * index is then used to look up a coordinate from a storage buffer which is
+ * used to call the WGSL texture function to read/sample the texture, and then
+ * write to a storage buffer. We then read the storage buffer for the per "call"
+ * results.
+ *
+ * We use a 1x1 target and use instance drawing, once instance per call.
+ * This allows use to more easily adjust derivatives per call.
+ *
+ * An issue we ran into before this "one draw call per instance" change;
+ * Before we had a single draw call and wrote the result of one call per
+ * pixel rendered.
+ *
+ * Imagine we have code like this:
+ *
+ * ```
+ * @group(0) @binding(0) var T: texture_2d<f32>;
+ * @group(0) @binding(1) var S: sampler;
+ * @group(0) @binding(2) var<storage> coords: array<vec4f>;
+ * @fragment fn fs(@builtin(position) pos: vec4f) -> vec4f {
+ *   let ndx = u32(pos.x) * u32(pos.y) * targetWidth;
+ *   return textureSample(T, S, coords[ndx].xy);
+ * }
+ * ```
+ *
+ * T points to 8x8 pixel texture with 3 mip levels
+ * S is 'nearest'
+ * coords: is a storage buffer, 16 bytes long [0,0,0,0], one vec4f.
+ * our render target is 1x1 pixels
+ *
+ * Looking above it appears `ndx` will only ever be 0 but that's
+ * not what happens. Instead, the GPU will run the fragment shader for
+ * a 2x2 area. It does this to compute derivatives by running the code
+ * above and looking at what values it gets passed as coords to
+ * textureSample. When it does this it ends up with
+ *
+ * ndx = 0 for invocation 0
+ * ndx = 1 for invocation 1
+ * ndx = 0 + 1 * targetWidth for invocation 2
+ * ndx = 1 + 1 * targetWidth for invocation 3
+ *
+ * In 3 of those cases `ndx` is out of bounds with respect to `coords`.
+ * Out of bounds access is indeterminate. That means the derivatives are
+ * indeterminate so what lod it tries to read is indeterminate.
+ *
+ * By using instance_index for ndx we avoid this issue. ndx is the same
+ * on all 4 executions.
  *
  * Calls are "binned" by call parameters. Each bin has its own structure and
  * field in the storage buffer. This allows the calls to be non-homogenous and
  * each have their own data type for coordinates.
+ *
+ * Note: this function returns:
+ *
+ * 'results': an array of results, one for each call.
+ *
+ * 'run': a function that accepts a texture and runs the same class pipeline with
+ *        that texture as input, returning an array of results. This can be used by
+ *        identifySamplePoints to query the mix-weights used. We do this so we're
+ *        using the same shader that generated the original results when querying
+ *        the weights.
+ *
+ * 'destroy': a function that cleans up the buffers used by `run`.
  */
-export async function doTextureCalls<T extends Dimensionality>(
+function createTextureCallsRunner<T extends Dimensionality>(
   t: GPUTest,
-  gpuTexture: GPUTexture | GPUExternalTexture,
+  {
+    format,
+    dimension,
+    sampleCount,
+    depthOrArrayLayers,
+  }: {
+    format: GPUTextureFormat;
+    dimension: GPUTextureDimension;
+    sampleCount: number;
+    depthOrArrayLayers: number;
+  },
   viewDescriptor: GPUTextureViewDescriptor,
   textureType: string,
   sampler: GPUSamplerDescriptor | undefined,
-  calls: TextureCall<T>[]
+  calls: TextureCall<T>[],
+  stage: ShaderStage
 ) {
   let structs = '';
   let body = '';
@@ -2894,15 +4362,15 @@ export async function doTextureCalls<T extends Dimensionality>(
   binned.forEach((binCalls, binIdx) => {
     const b = buildBinnedCalls(binCalls.map(callIdx => calls[callIdx]));
     structs += `struct Args${binIdx} {
-  ${b.fields.join(',  \n')}
+  ${b.fields.join(',\n  ')}
 }
 `;
     dataFields += `  args${binIdx} : array<Args${binIdx}, ${binCalls.length}>,
 `;
     body += `
   {
-    let is_active = (frag_idx >= ${callCount}) & (frag_idx < ${callCount + binCalls.length});
-    let args = data.args${binIdx}[frag_idx - ${callCount}];
+    let is_active = (idx >= ${callCount}) & (idx < ${callCount + binCalls.length});
+    let args = data.args${binIdx}[idx - ${callCount}];
     let call = ${b.expr};
     result = select(result, call, is_active);
   }
@@ -2913,25 +4381,93 @@ export async function doTextureCalls<T extends Dimensionality>(
 
   const dataBuffer = t.createBufferTracked({
     size: data.length * 4,
-    usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE,
+    usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.UNIFORM,
   });
   t.device.queue.writeBuffer(dataBuffer, 0, new Uint32Array(data));
 
-  const { resultType, resultFormat, componentType } =
-    gpuTexture instanceof GPUExternalTexture
-      ? ({ resultType: 'vec4f', resultFormat: 'rgba32float', componentType: 'f32' } as const)
-      : textureType.includes('depth')
-      ? ({ resultType: 'f32', resultFormat: 'rgba32float', componentType: 'f32' } as const)
-      : getTextureFormatTypeInfo(gpuTexture.format);
+  const builtin = calls[0].builtin;
+  const isCompare = isBuiltinComparison(builtin);
+
+  const { resultType, resultFormat, componentType } = isBuiltinGather(builtin)
+    ? getTextureFormatTypeInfo(format)
+    : textureType === 'texture_external'
+    ? ({ resultType: 'vec4f', resultFormat: 'rgba32float', componentType: 'f32' } as const)
+    : textureType.includes('depth')
+    ? ({ resultType: 'f32', resultFormat: 'rgba32float', componentType: 'f32' } as const)
+    : getTextureFormatTypeInfo(format);
   const returnType = `vec4<${componentType}>`;
 
-  const rtWidth = 256;
+  const samplerType = isCompare ? 'sampler_comparison' : 'sampler';
+
   const renderTarget = t.createTextureTracked({
     format: resultFormat,
-    size: { width: rtWidth, height: Math.ceil(calls.length / rtWidth) },
+    size: [calls.length, 1],
     usage: GPUTextureUsage.COPY_SRC | GPUTextureUsage.RENDER_ATTACHMENT,
   });
 
+  // derivativeBase is a number that starts at (0, 0, 0) and advances by 1 in x, y
+  // for each fragment shader iteration in texel space. It is then converted to normalized
+  // texture space by dividing by the textureDimensions.
+  // Since it's moving by 1 texel unit we can multiply it to get any specific lod value we want.
+  // Because it starts at (0, 0, 0) it will not affect our texture coordinate.
+  const derivativeBaseWGSL = `
+  let derivativeBase = ${
+    isCubeViewDimension(viewDescriptor)
+      ? '(v.pos.xyx - 0.5 - vec3f(f32(v.ndx), 0, f32(v.ndx))) / vec3f(vec2f(textureDimensions(T)), 1.0)'
+      : dimension === '1d'
+      ? 'f32(v.pos.x - 0.5 - f32(v.ndx)) / f32(textureDimensions(T))'
+      : dimension === '3d'
+      ? 'vec3f(v.pos.xy - 0.5 - vec2f(f32(v.ndx), 0), 0) / vec3f(textureDimensions(T))'
+      : '(v.pos.xy - 0.5 - vec2f(f32(v.ndx), 0)) / vec2f(textureDimensions(T))'
+  };`;
+  const derivativeType =
+    isCubeViewDimension(viewDescriptor) || dimension === '3d'
+      ? 'vec3f'
+      : dimension === '1d'
+      ? 'f32'
+      : 'vec2f';
+
+  const stageWGSL =
+    stage === 'vertex'
+      ? `
+// --------------------------- vertex stage shaders --------------------------------
+@vertex fn vsVertex(
+    @builtin(vertex_index) vertex_index : u32,
+    @builtin(instance_index) instance_index : u32) -> VOut {
+  let positions = array(vec2f(-1, 3), vec2f(3, -1), vec2f(-1, -1));
+  return VOut(vec4f(positions[vertex_index], 0, 1),
+              instance_index,
+              getResult(instance_index, ${derivativeType}(0)));
+}
+
+@fragment fn fsVertex(v: VOut) -> @location(0) ${returnType} {
+  return v.result;
+}
+`
+      : stage === 'fragment'
+      ? `
+// --------------------------- fragment stage shaders --------------------------------
+@vertex fn vsFragment(
+    @builtin(vertex_index) vertex_index : u32,
+    @builtin(instance_index) instance_index : u32) -> VOut {
+  let positions = array(vec2f(-1, 3), vec2f(3, -1), vec2f(-1, -1));
+  return VOut(vec4f(positions[vertex_index], 0, 1), instance_index, ${returnType}(0));
+}
+
+@fragment fn fsFragment(v: VOut) -> @location(0) ${returnType} {
+  ${derivativeBaseWGSL}
+  return getResult(v.ndx, derivativeBase);
+}
+`
+      : `
+// --------------------------- compute stage shaders --------------------------------
+@group(1) @binding(0) var<storage, read_write> results: array<${returnType}>;
+
+@compute @workgroup_size(1) fn csCompute(@builtin(global_invocation_id) id: vec3u) {
+  results[id.x] = getResult(id.x, ${derivativeType}(0));
+}
+`;
+
   const code = `
 ${structs}
 
@@ -2939,120 +4475,301 @@ struct Data {
 ${dataFields}
 }
 
-@vertex
-fn vs_main(@builtin(vertex_index) vertex_index : u32) -> @builtin(position) vec4f {
-  let positions = array(
-    vec4f(-1,  1, 0, 1), vec4f( 1,  1, 0, 1),
-    vec4f(-1, -1, 0, 1), vec4f( 1, -1, 0, 1),
-  );
-  return positions[vertex_index];
-}
+struct VOut {
+  @builtin(position) pos: vec4f,
+  @location(0) @interpolate(flat, either) ndx: u32,
+  @location(1) @interpolate(flat, either) result: ${returnType},
+};
 
 @group(0) @binding(0) var          T    : ${textureType};
-${sampler ? '@group(0) @binding(1) var          S    : sampler' : ''};
-@group(0) @binding(2) var<storage> data : Data;
+${sampler ? `@group(0) @binding(1) var          S    : ${samplerType}` : ''};
+@group(0) @binding(2) var<uniform> data : Data;
 
-@fragment
-fn fs_main(@builtin(position) frag_pos : vec4f) -> @location(0) ${returnType} {
-  let frag_idx = u32(frag_pos.x) + u32(frag_pos.y) * ${renderTarget.width};
+fn getResult(idx: u32, derivativeBase: ${derivativeType}) -> ${returnType} {
   var result : ${resultType};
 ${body}
   return ${returnType}(result);
 }
+
+${stageWGSL}
 `;
 
-  const pipelines = s_deviceToPipelines.get(t.device) ?? new Map<string, GPURenderPipeline>();
+  const pipelines =
+    s_deviceToPipelines.get(t.device) ?? new Map<string, GPURenderPipeline | GPUComputePipeline>();
   s_deviceToPipelines.set(t.device, pipelines);
 
-  const id = `${renderTarget.format}:${code}`;
+  // unfilterable-float textures can only be used with manually created bindGroupLayouts
+  // since the default 'auto' layout requires filterable textures/samplers.
+  // So, if we don't need filtering, don't request a filtering sampler. If we require
+  // filtering then check if the format is 32float format and if float32-filterable
+  // is enabled.
+  const info = kTextureFormatInfo[format ?? 'rgba8unorm'];
+  const isFiltering =
+    !!sampler &&
+    (sampler.minFilter === 'linear' ||
+      sampler.magFilter === 'linear' ||
+      sampler.mipmapFilter === 'linear');
+  let sampleType: GPUTextureSampleType = textureType.startsWith('texture_depth')
+    ? 'depth'
+    : isDepthTextureFormat(format)
+    ? 'unfilterable-float'
+    : isStencilTextureFormat(format)
+    ? 'uint'
+    : info.color?.type ?? 'float';
+  if (isFiltering && sampleType === 'unfilterable-float') {
+    assert(is32Float(format));
+    assert(t.device.features.has('float32-filterable'));
+    sampleType = 'float';
+  }
+  if (sampleCount > 1 && sampleType === 'float') {
+    sampleType = 'unfilterable-float';
+  }
+
+  const visibility =
+    stage === 'compute'
+      ? GPUShaderStage.COMPUTE
+      : stage === 'fragment'
+      ? GPUShaderStage.FRAGMENT
+      : GPUShaderStage.VERTEX;
+
+  const entries: GPUBindGroupLayoutEntry[] = [
+    {
+      binding: 2,
+      visibility,
+      buffer: {
+        type: 'uniform',
+      },
+    },
+  ];
+
+  const viewDimension = effectiveViewDimensionForDimension(
+    viewDescriptor.dimension,
+    dimension,
+    depthOrArrayLayers
+  );
+
+  if (textureType.includes('storage')) {
+    entries.push({
+      binding: 0,
+      visibility,
+      storageTexture: {
+        access: 'read-only',
+        viewDimension,
+        format,
+      },
+    });
+  } else if (textureType === 'texture_external') {
+    entries.push({
+      binding: 0,
+      visibility,
+      externalTexture: {},
+    });
+  } else {
+    entries.push({
+      binding: 0,
+      visibility,
+      texture: {
+        sampleType,
+        viewDimension,
+        multisampled: sampleCount > 1,
+      },
+    });
+  }
+
+  if (sampler) {
+    entries.push({
+      binding: 1,
+      visibility,
+      sampler: {
+        type: isCompare ? 'comparison' : isFiltering ? 'filtering' : 'non-filtering',
+      },
+    });
+  }
+
+  const id = `${resultType}:${stage}:${JSON.stringify(entries)}:${code}`;
   let pipeline = pipelines.get(id);
   if (!pipeline) {
-    const shaderModule = t.device.createShaderModule({ code });
+    const module = t.device.createShaderModule({ code });
+    const bindGroupLayout0 = t.device.createBindGroupLayout({ entries });
+    const bindGroupLayouts = [bindGroupLayout0];
+
+    if (stage === 'compute') {
+      const bindGroupLayout1 = t.device.createBindGroupLayout({
+        entries: [
+          {
+            binding: 0,
+            visibility: GPUShaderStage.FRAGMENT | GPUShaderStage.COMPUTE,
+            buffer: {
+              type: 'storage',
+            },
+          },
+        ],
+      });
+      bindGroupLayouts.push(bindGroupLayout1);
+    }
 
-    pipeline = await t.device.createRenderPipelineAsync({
-      layout: 'auto',
-      vertex: { module: shaderModule },
-      fragment: {
-        module: shaderModule,
-        targets: [{ format: renderTarget.format }],
-      },
-      primitive: { topology: 'triangle-strip' },
+    const layout = t.device.createPipelineLayout({
+      bindGroupLayouts,
     });
 
+    switch (stage) {
+      case 'compute':
+        pipeline = t.device.createComputePipeline({
+          layout,
+          compute: { module },
+        });
+        break;
+      case 'fragment':
+      case 'vertex':
+        pipeline = t.device.createRenderPipeline({
+          layout,
+          vertex: { module },
+          fragment: {
+            module,
+            targets: [{ format: renderTarget.format }],
+          },
+        });
+        break;
+    }
     pipelines.set(id, pipeline);
   }
 
   const gpuSampler = sampler ? t.device.createSampler(sampler) : undefined;
 
-  const bindGroup = t.device.createBindGroup({
-    layout: pipeline.getBindGroupLayout(0),
-    entries: [
-      {
-        binding: 0,
-        resource:
-          gpuTexture instanceof GPUExternalTexture
-            ? gpuTexture
-            : gpuTexture.createView(viewDescriptor),
-      },
-      ...(sampler ? [{ binding: 1, resource: gpuSampler! }] : []),
-      { binding: 2, resource: { buffer: dataBuffer } },
-    ],
-  });
+  const run = async (gpuTexture: GPUTexture | GPUExternalTexture) => {
+    const resultBuffer = t.createBufferTracked({
+      size: align(calls.length * 16, 256),
+      usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ,
+    });
 
-  const bytesPerRow = align(16 * renderTarget.width, 256);
-  const resultBuffer = t.createBufferTracked({
-    size: renderTarget.height * bytesPerRow,
-    usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ,
-  });
-  const encoder = t.device.createCommandEncoder();
+    const bindGroup0 = t.device.createBindGroup({
+      layout: pipeline!.getBindGroupLayout(0),
+      entries: [
+        {
+          binding: 0,
+          resource:
+            gpuTexture instanceof GPUExternalTexture
+              ? gpuTexture
+              : gpuTexture.createView(viewDescriptor),
+        },
+        ...(sampler ? [{ binding: 1, resource: gpuSampler! }] : []),
+        { binding: 2, resource: { buffer: dataBuffer } },
+      ],
+    });
 
-  const renderPass = encoder.beginRenderPass({
-    colorAttachments: [
-      {
-        view: renderTarget.createView(),
-        loadOp: 'clear',
-        storeOp: 'store',
-      },
-    ],
-  });
+    let storageBuffer: GPUBuffer | undefined;
+    const encoder = t.device.createCommandEncoder();
 
-  renderPass.setPipeline(pipeline);
-  renderPass.setBindGroup(0, bindGroup);
-  renderPass.draw(4);
-  renderPass.end();
-  encoder.copyTextureToBuffer(
-    { texture: renderTarget },
-    { buffer: resultBuffer, bytesPerRow },
-    { width: renderTarget.width, height: renderTarget.height }
-  );
-  t.device.queue.submit([encoder.finish()]);
+    if (stage === 'compute') {
+      storageBuffer = t.createBufferTracked({
+        size: resultBuffer.size,
+        usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC,
+      });
 
-  await resultBuffer.mapAsync(GPUMapMode.READ);
+      const bindGroup1 = t.device.createBindGroup({
+        layout: pipeline!.getBindGroupLayout(1),
+        entries: [{ binding: 0, resource: { buffer: storageBuffer } }],
+      });
 
-  const view = TexelView.fromTextureDataByReference(
-    renderTarget.format as EncodableTextureFormat,
-    new Uint8Array(resultBuffer.getMappedRange()),
-    {
-      bytesPerRow,
-      rowsPerImage: renderTarget.height,
-      subrectOrigin: [0, 0, 0],
-      subrectSize: [renderTarget.width, renderTarget.height],
+      const pass = encoder.beginComputePass();
+      pass.setPipeline(pipeline! as GPUComputePipeline);
+      pass.setBindGroup(0, bindGroup0);
+      pass.setBindGroup(1, bindGroup1);
+      pass.dispatchWorkgroups(calls.length);
+      pass.end();
+      encoder.copyBufferToBuffer(storageBuffer, 0, resultBuffer, 0, storageBuffer.size);
+    } else {
+      const pass = encoder.beginRenderPass({
+        colorAttachments: [
+          {
+            view: renderTarget.createView(),
+            loadOp: 'clear',
+            storeOp: 'store',
+          },
+        ],
+      });
+
+      pass.setPipeline(pipeline! as GPURenderPipeline);
+      pass.setBindGroup(0, bindGroup0);
+      for (let i = 0; i < calls.length; ++i) {
+        pass.setViewport(i, 0, 1, 1, 0, 1);
+        pass.draw(3, 1, 0, i);
+      }
+      pass.end();
+      encoder.copyTextureToBuffer(
+        { texture: renderTarget },
+        {
+          buffer: resultBuffer,
+          bytesPerRow: resultBuffer.size,
+        },
+        [renderTarget.width, 1]
+      );
     }
-  );
+    t.device.queue.submit([encoder.finish()]);
+
+    await resultBuffer.mapAsync(GPUMapMode.READ);
+
+    const view = TexelView.fromTextureDataByReference(
+      resultFormat,
+      new Uint8Array(resultBuffer.getMappedRange()),
+      {
+        bytesPerRow: calls.length * 16,
+        rowsPerImage: 1,
+        subrectOrigin: [0, 0, 0],
+        subrectSize: [calls.length, 1],
+      }
+    );
 
-  let outIdx = 0;
-  const out = new Array<PerTexelComponent<number>>(calls.length);
-  for (const bin of binned) {
-    for (const callIdx of bin) {
-      const x = outIdx % rtWidth;
-      const y = Math.floor(outIdx / rtWidth);
-      out[callIdx] = view.color({ x, y, z: 0 });
-      outIdx++;
+    let outIdx = 0;
+    const out = new Array<PerTexelComponent<number>>(calls.length);
+    for (const bin of binned) {
+      for (const callIdx of bin) {
+        const x = outIdx;
+        out[callIdx] = view.color({ x, y: 0, z: 0 });
+        outIdx++;
+      }
     }
-  }
 
-  renderTarget.destroy();
-  resultBuffer.destroy();
+    storageBuffer?.destroy();
+    resultBuffer.destroy();
 
-  return out;
+    return out;
+  };
+
+  return {
+    run,
+    destroy() {
+      dataBuffer.destroy();
+      renderTarget.destroy();
+    },
+  };
+}
+
+export async function doTextureCalls<T extends Dimensionality>(
+  t: GPUTest,
+  gpuTexture: GPUTexture | GPUExternalTexture,
+  viewDescriptor: GPUTextureViewDescriptor,
+  textureType: string,
+  sampler: GPUSamplerDescriptor | undefined,
+  calls: TextureCall<T>[],
+  shortShaderStage: ShortShaderStage
+) {
+  const stage = kShortShaderStageToShaderStage[shortShaderStage];
+  const runner = createTextureCallsRunner(
+    t,
+    gpuTexture instanceof GPUExternalTexture
+      ? { format: 'rgba8unorm', dimension: '2d', depthOrArrayLayers: 1, sampleCount: 1 }
+      : gpuTexture,
+    viewDescriptor,
+    textureType,
+    sampler,
+    calls,
+    stage
+  );
+  const results = await runner.run(gpuTexture);
+
+  return {
+    runner,
+    results,
+  };
 }
diff --git a/src/webgpu/shader/execution/padding.spec.ts b/src/webgpu/shader/execution/padding.spec.ts
index 3a3671bcc3ff..c9e230013590 100644
--- a/src/webgpu/shader/execution/padding.spec.ts
+++ b/src/webgpu/shader/execution/padding.spec.ts
@@ -263,6 +263,87 @@ g.test('array_of_vec3')
     );
   });
 
+g.test('array_of_vec3h')
+  .desc(
+    `Test that padding bytes in between array elements are preserved when f16 elements are used.
+
+     This test defines creates a read-write storage buffer with type array<vec3h, 4>. The shader
+     assigns the whole variable at once, and we then test that data in the padding bytes was
+     preserved.
+    `
+  )
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('shader-f16');
+  })
+  .fn(t => {
+    const wgsl = `
+      enable f16;
+      @group(0) @binding(0) var<storage, read_write> buffer : array<vec3<f16>, 4>;
+
+      @compute @workgroup_size(1)
+      fn main() {
+        buffer = array<vec3<f16>, 4>(
+          vec3(1h),
+          vec3(2h),
+          vec3(3h),
+          vec3(4h),
+        );
+      }
+    `;
+    runShaderTest(
+      t,
+      wgsl,
+      new Uint32Array([
+        // buffer[0]
+        0x3c003c00, 0xdead3c00,
+        // buffer[1]
+        0x40004000, 0xdead4000,
+        // buffer[2]
+        0x42004200, 0xdead4200,
+        // buffer[2]
+        0x44004400, 0xdead4400,
+      ])
+    );
+  });
+
+g.test('array_of_vec3h,elementwise')
+  .desc(
+    `Test that padding bytes in between array elements are preserved when f16 elements are used.
+
+     This test defines creates a read-write storage buffer with type array<vec3h, 4>. The shader
+     assigns one element per thread, and we then test that data in the padding bytes was
+     preserved.
+    `
+  )
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('shader-f16');
+  })
+  .fn(t => {
+    const wgsl = `
+      enable f16;
+      @group(0) @binding(0) var<storage, read_write> buffer : array<vec3<f16>>;
+
+      @compute @workgroup_size(4)
+      fn main(@builtin(local_invocation_index) lid : u32) {
+        buffer[lid] = vec3h(f16(lid + 1));
+      }
+    `;
+    runShaderTest(
+      t,
+      wgsl,
+      new Uint32Array([
+        // buffer[0]
+        0x3c003c00, 0xdead3c00,
+        // buffer[1]
+        0x40004000, 0xdead4000,
+        // buffer[2]
+        0x42004200, 0xdead4200,
+        // buffer[2]
+        0x44004400, 0xdead4400,
+      ])
+    );
+  });
+
 g.test('array_of_struct')
   .desc(
     `Test that padding bytes in between array elements are preserved.
diff --git a/src/webgpu/shader/execution/shader_io/fragment_builtins.spec.ts b/src/webgpu/shader/execution/shader_io/fragment_builtins.spec.ts
index ffd58976fc88..7a6aa8901e28 100644
--- a/src/webgpu/shader/execution/shader_io/fragment_builtins.spec.ts
+++ b/src/webgpu/shader/execution/shader_io/fragment_builtins.spec.ts
@@ -20,14 +20,17 @@ is evaluated per-fragment or per-sample. With @interpolate(, sample) or usage of
 import { makeTestGroup } from '../../../../common/framework/test_group.js';
 import { ErrorWithExtra, assert, range, unreachable } from '../../../../common/util/util.js';
 import { InterpolationSampling, InterpolationType } from '../../../constants.js';
-import { GPUTest } from '../../../gpu_test.js';
+import { kTextureFormatInfo } from '../../../format_info.js';
+import { GPUTest, TextureTestMixin } from '../../../gpu_test.js';
 import { getProvokingVertexForFlatInterpolationEitherSampling } from '../../../inter_stage.js';
 import { getMultisampleFragmentOffsets } from '../../../multisample_info.js';
-import { dotProduct, subtractVectors } from '../../../util/math.js';
+import { dotProduct, subtractVectors, align } from '../../../util/math.js';
 import { TexelView } from '../../../util/texture/texel_view.js';
 import { findFailedPixels } from '../../../util/texture/texture_ok.js';
 
-export const g = makeTestGroup(GPUTest);
+class FragmentBuiltinTest extends TextureTestMixin(GPUTest) {}
+
+export const g = makeTestGroup(FragmentBuiltinTest);
 
 const s_deviceToPipelineMap = new WeakMap<
   GPUDevice,
@@ -589,7 +592,7 @@ async function renderFragmentShaderInputsTo4TexturesAndReadbackValues(
 
       struct FragmentIn {
         @builtin(position) position: vec4f,
-        @location(0) @interpolate(${interpolate}) interpolatedValue: vec4f,
+@location(0) @interpolate(${interpolate}) interpolatedValue: vec4f,
         ${fragInCode}
       };
 
@@ -1424,6 +1427,385 @@ g.test('inputs,sample_mask')
     );
   });
 
-g.test('subgroup_size').unimplemented();
+const kSizes = [
+  [15, 15],
+  [16, 16],
+  [17, 17],
+  [19, 13],
+  [13, 10],
+  [111, 2],
+  [2, 111],
+  [35, 2],
+  [2, 35],
+  [53, 13],
+  [13, 53],
+] as const;
+
+/**
+ * @returns The population count of input.
+ *
+ * @param input Treated as an unsigned 32-bit integer
+ */
+function popcount(input: number): number {
+  let n = input;
+  n = n - ((n >> 1) & 0x55555555);
+  n = (n & 0x33333333) + ((n >> 2) & 0x33333333);
+  return (((n + (n >> 4)) & 0xf0f0f0f) * 0x1010101) >> 24;
+}
+
+/**
+ * Checks subgroup_size builtin value consistency.
+ *
+ * The builtin subgroup_size is not assumed to be uniform in fragment shaders.
+ * Therefore, this function checks the value is a power of two within the device
+ * limits and that the ballot size is less than the stated size.
+ * @param data An array of vec4u that contains (per texel):
+ *             * builtin value
+ *             * ballot size
+ *             * comparison to other invocations
+ *             * 0
+ * @param format The texture format for data
+ * @param min The minimum subgroup size from the device
+ * @param max The maximum subgroup size from the device
+ * @param width The width of the framebuffer
+ * @param height The height of the framebuffer
+ */
+function checkSubgroupSizeConsistency(
+  data: Uint32Array,
+  format: GPUTextureFormat,
+  min: number,
+  max: number,
+  width: number,
+  height: number
+): Error | undefined {
+  const { blockWidth, blockHeight, bytesPerBlock } = kTextureFormatInfo[format];
+  const blocksPerRow = width / blockWidth;
+  // Image copies require bytesPerRow to be a multiple of 256.
+  const bytesPerRow = align(blocksPerRow * (bytesPerBlock ?? 1), 256);
+  const uintsPerRow = bytesPerRow / 4;
+  const uintsPerTexel = (bytesPerBlock ?? 1) / blockWidth / blockHeight / 4;
+
+  for (let row = 0; row < height; row++) {
+    for (let col = 0; col < width; col++) {
+      const offset = uintsPerRow * row + col * uintsPerTexel;
+      const builtinSize = data[offset];
+      const ballotSize = data[offset + 1];
+      const comparison = data[offset + 2];
+      if (builtinSize === 0) {
+        continue;
+      }
+
+      if (popcount(builtinSize) !== 1) {
+        return new Error(`Subgroup size '${builtinSize}' is not a power of two`);
+      }
+
+      if (builtinSize < min) {
+        return new Error(`Subgroup size '${builtinSize}' is less than minimum '${min}'`);
+      }
+      if (max < builtinSize) {
+        return new Error(`Subgroup size '${builtinSize}' is greater than maximum '${max}'`);
+      }
+
+      if (builtinSize < ballotSize) {
+        return new Error(`Inconsistent subgroup ballot size
+-   icoord: (${row}, ${col})
+- expected: ${builtinSize}
+-      got: ${ballotSize}`);
+      }
+
+      if (comparison !== 1) {
+        return new Error(`Not all invocations in subgroup have same view of the size
+- icoord: (${row}, ${col})`);
+      }
+    }
+  }
+
+  return undefined;
+}
+
+/**
+ * Runs a subgroup builtin test for fragment shaders
+ *
+ * This test draws a full screen in 2 separate draw calls (half screen each).
+ * Results are checked for each draw.
+ * @param t The base test
+ * @param format The framebuffer format
+ * @param fsShader The fragment shader with the following interface:
+ *                 Location 0 output is framebuffer with format
+ *                 Group 0 binding 0 is a u32 sized data
+ * @param width The framebuffer width
+ * @param height The framebuffer height
+ * @param checker A functor to check the framebuffer values
+ */
+async function runSubgroupTest(
+  t: FragmentBuiltinTest,
+  format: GPUTextureFormat,
+  fsShader: string,
+  width: number,
+  height: number,
+  checker: (data: Uint32Array) => Error | undefined
+) {
+  const vsShader = `
+@vertex
+fn vsMain(@builtin(vertex_index) index : u32) -> @builtin(position) vec4f {
+  const vertices = array(
+    vec2(-1, -1), vec2(-1,  1), vec2( 1,  1),
+    vec2(-1, -1), vec2( 1, -1), vec2( 1,  1),
+  );
+  return vec4f(vec2f(vertices[index]), 0, 1);
+}`;
+
+  const pipeline = t.device.createRenderPipeline({
+    layout: 'auto',
+    vertex: {
+      module: t.device.createShaderModule({ code: vsShader }),
+    },
+    fragment: {
+      module: t.device.createShaderModule({ code: fsShader }),
+      targets: [{ format }],
+    },
+    primitive: {
+      topology: 'triangle-list',
+    },
+  });
+
+  const { blockWidth, blockHeight, bytesPerBlock } = kTextureFormatInfo[format];
+  assert(bytesPerBlock !== undefined);
+
+  const blocksPerRow = width / blockWidth;
+  const blocksPerColumn = height / blockHeight;
+  const bytesPerRow = align(blocksPerRow * (bytesPerBlock ?? 1), 256);
+  const byteLength = bytesPerRow * blocksPerColumn;
+  const uintLength = byteLength / 4;
+
+  const buffer = t.makeBufferWithContents(
+    new Uint32Array([1]),
+    GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST
+  );
+
+  const bg = t.device.createBindGroup({
+    layout: pipeline.getBindGroupLayout(0),
+    entries: [
+      {
+        binding: 0,
+        resource: {
+          buffer,
+        },
+      },
+    ],
+  });
 
-g.test('subgroup_invocation_id').unimplemented();
+  for (let i = 0; i < 2; i++) {
+    const framebuffer = t.createTextureTracked({
+      size: [width, height],
+      usage:
+        GPUTextureUsage.COPY_SRC |
+        GPUTextureUsage.COPY_DST |
+        GPUTextureUsage.RENDER_ATTACHMENT |
+        GPUTextureUsage.TEXTURE_BINDING,
+      format,
+    });
+
+    const encoder = t.device.createCommandEncoder();
+    const pass = encoder.beginRenderPass({
+      colorAttachments: [
+        {
+          view: framebuffer.createView(),
+          loadOp: 'clear',
+          storeOp: 'store',
+        },
+      ],
+    });
+    pass.setPipeline(pipeline);
+    pass.setBindGroup(0, bg);
+    pass.draw(3, 1, i);
+    pass.end();
+    t.queue.submit([encoder.finish()]);
+
+    const buffer = t.copyWholeTextureToNewBufferSimple(framebuffer, 0);
+    const readback = await t.readGPUBufferRangeTyped(buffer, {
+      srcByteOffset: 0,
+      type: Uint32Array,
+      typedLength: uintLength,
+      method: 'copy',
+    });
+    const data: Uint32Array = readback.data;
+
+    t.expectOK(checker(data));
+  }
+}
+
+g.test('subgroup_size')
+  .desc('Tests subgroup_size values')
+  .params(u =>
+    u
+      .combine('size', kSizes)
+      .beginSubcases()
+      .combineWithParams([{ format: 'rgba32uint' }] as const)
+  )
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(async t => {
+    interface SubgroupLimits extends GPUSupportedLimits {
+      minSubgroupSize: number;
+      maxSubgroupSize: number;
+    }
+    const { minSubgroupSize, maxSubgroupSize } = t.device.limits as SubgroupLimits;
+
+    const fsShader = `
+enable subgroups;
+
+const width = ${t.params.size[0]};
+const height = ${t.params.size[1]};
+
+@group(0) @binding(0) var<storage, read_write> for_layout : u32;
+
+@fragment
+fn fsMain(
+  @builtin(position) pos : vec4f,
+  @builtin(subgroup_size) sg_size : u32,
+) -> @location(0) vec4u {
+  _ = for_layout;
+
+  let ballot = countOneBits(subgroupBallot(true));
+  let ballotSize = ballot.x + ballot.y + ballot.z + ballot.w;
+
+  // Do all invocations in the subgroup see the same subgroup size?
+  let firstSize = subgroupBroadcast(sg_size, 0);
+  let compareBallot = countOneBits(subgroupBallot(firstSize == sg_size));
+  let compareSize = compareBallot.x + compareBallot.y + compareBallot.z + compareBallot.w;
+  let sameSize = select(0u, 1u, compareSize == ballotSize);
+
+  return vec4u(sg_size, ballotSize, sameSize, 0);
+}`;
+
+    await runSubgroupTest(
+      t,
+      t.params.format,
+      fsShader,
+      t.params.size[0],
+      t.params.size[1],
+      (data: Uint32Array) => {
+        return checkSubgroupSizeConsistency(
+          data,
+          t.params.format,
+          minSubgroupSize,
+          maxSubgroupSize,
+          t.params.size[0],
+          t.params.size[1]
+        );
+      }
+    );
+  });
+
+/**
+ * Checks subgroup_invocation_id value consistency
+ *
+ * Very little uniformity is expected for subgroup_invocation_id.
+ * This function checks that all ids are less than the subgroup size
+ * and no id is repeated.
+ * @param data An array of vec4u that contains (per texel):
+ *             * subgroup_invocation_id
+ *             * ballot size
+ *             * non-zero ID unique to each subgroup
+ *             * 0
+ * @param format The texture format of data
+ * @param width The width of the framebuffer
+ * @param height The height of the framebuffer
+ */
+function checkSubgroupInvocationIdConsistency(
+  data: Uint32Array,
+  format: GPUTextureFormat,
+  width: number,
+  height: number
+): Error | undefined {
+  const { blockWidth, blockHeight, bytesPerBlock } = kTextureFormatInfo[format];
+  const blocksPerRow = width / blockWidth;
+  const bytesPerRow = align(blocksPerRow * (bytesPerBlock ?? 1), 256);
+  const uintsPerRow = bytesPerRow / 4;
+  const uintsPerTexel = (bytesPerBlock ?? 1) / blockWidth / blockHeight / 4;
+
+  const mappings = new Map<number, bigint>();
+  for (let row = 0; row < height; row++) {
+    for (let col = 0; col < width; col++) {
+      const offset = uintsPerRow * row + col * uintsPerTexel;
+      const id = data[offset];
+      const size = data[offset + 1];
+      const repId = data[offset + 2];
+
+      if (repId === 0) {
+        continue;
+      }
+
+      if (size < id) {
+        return new Error(
+          `Invocation id '${id}' is greater than subgroup size '${size}' for (${row}, ${col})`
+        );
+      }
+
+      let v = mappings.get(repId) ?? 0n;
+      const mask = 1n << BigInt(id);
+      if ((mask & v) !== 0n) {
+        return new Error(`Multiple invocations with id '${id}' in subgroup '${repId}'`);
+      }
+      v |= mask;
+      mappings.set(repId, v);
+    }
+  }
+
+  return undefined;
+}
+
+g.test('subgroup_invocation_id')
+  .desc('Tests subgroup_invocation_id built-in value')
+  .params(u =>
+    u
+      .combine('size', kSizes)
+      .beginSubcases()
+      .combineWithParams([{ format: 'rgba32uint' }] as const)
+  )
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(async t => {
+    const fsShader = `
+enable subgroups;
+
+const width = ${t.params.size[0]};
+const height = ${t.params.size[1]};
+
+@group(0) @binding(0) var<storage, read_write> counter : atomic<u32>;
+
+@fragment
+fn fsMain(
+  @builtin(position) pos : vec4f,
+  @builtin(subgroup_invocation_id) id : u32,
+  @builtin(subgroup_size) sg_size : u32,
+) -> @location(0) vec4u {
+  let ballot = countOneBits(subgroupBallot(true));
+  let ballotSize = ballot.x + ballot.y + ballot.z + ballot.w;
+
+  // Generate representative id for this subgroup.
+  var repId = atomicAdd(&counter, 1);
+  repId = subgroupBroadcast(repId, 0);
+
+  return vec4u(id, ballotSize, repId, 0);
+}`;
+
+    await runSubgroupTest(
+      t,
+      t.params.format,
+      fsShader,
+      t.params.size[0],
+      t.params.size[1],
+      (data: Uint32Array) => {
+        return checkSubgroupInvocationIdConsistency(
+          data,
+          t.params.format,
+          t.params.size[0],
+          t.params.size[1]
+        );
+      }
+    );
+  });
diff --git a/src/webgpu/shader/execution/shader_io/vertex_builtins.spec.ts b/src/webgpu/shader/execution/shader_io/vertex_builtins.spec.ts
new file mode 100644
index 000000000000..baf5c98326a8
--- /dev/null
+++ b/src/webgpu/shader/execution/shader_io/vertex_builtins.spec.ts
@@ -0,0 +1,150 @@
+export const description = `Test vertex shader builtin variables
+
+* test builtin(clip_distances)
+`;
+
+import { makeTestGroup } from '../../../../common/framework/test_group.js';
+import { GPUTest, TextureTestMixin } from '../../../gpu_test.js';
+
+class VertexBuiltinTest extends TextureTestMixin(GPUTest) {}
+
+export const g = makeTestGroup(VertexBuiltinTest);
+
+g.test('outputs,clip_distances')
+  .desc(
+    `
+    Test vertex shader builtin(clip_distances) values.
+
+    In the tests, we draw a square with two triangles (top-right and bottom left), whose vertices
+    have different clip distances values. (Top Left: -1, Bottom Right: 1 Top Right & Bottom Left: 0)
+    1. The clip distances values of the pixels in the top-left region should be less than 0 so these
+       pixels will all be invisible
+    2. The clip distances values of the pixels on the top-right-to-bottom-left diagonal line should
+       be equal to 0
+    3. The clip distances values of the pixels in the bottom-right region should be greater than 0
+
+    -1 - - - - - 0
+     | \\      x x
+     |   \\  x x x
+     |    \\ x x x
+     |   x x\\ x x
+     | x x x x\\ x
+     0 x x x x x 1
+  `
+  )
+  .params(u => u.combine('clipDistances', [1, 2, 3, 4, 5, 6, 7, 8] as const))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('clip-distances');
+  })
+  .fn(t => {
+    const { clipDistances } = t.params;
+
+    // Draw two triangles (top-right and bottom left) into Red, whose vertices have different clip
+    // distances values. (Top Left: -1, Bottom Right: 1 Top Right & Bottom Left: 0)
+    const code = `
+    enable clip_distances;
+    const kClipDistancesSize = ${clipDistances};
+    struct VertexOutputs {
+        @builtin(position) position : vec4f,
+        @builtin(clip_distances) clipDistances : array<f32, kClipDistancesSize>,
+    }
+    @vertex
+    fn vsMain(@builtin(vertex_index) vertexIndex : u32) -> VertexOutputs {
+          var posAndClipDistances = array(
+              vec3f(-1.0,  1.0, -1.0),
+              vec3f( 1.0, -1.0,  1.0),
+              vec3f( 1.0,  1.0,  0.0),
+              vec3f(-1.0, -1.0,  0.0),
+              vec3f( 1.0, -1.0,  1.0),
+              vec3f(-1.0,  1.0, -1.0));
+          var vertexOutput : VertexOutputs;
+          vertexOutput.position = vec4f(posAndClipDistances[vertexIndex].xy, 0.0, 1.0);
+          vertexOutput.clipDistances[kClipDistancesSize - 1] = posAndClipDistances[vertexIndex].z;
+          return vertexOutput;
+    }
+    @fragment
+    fn fsMain() -> @location(0) vec4f {
+        return vec4f(1.0, 0.0, 0.0, 1.0);
+    }`;
+    const module = t.device.createShaderModule({ code });
+    const renderPipeline = t.device.createRenderPipeline({
+      layout: 'auto',
+      vertex: {
+        module,
+      },
+      fragment: {
+        module,
+        targets: [
+          {
+            format: 'rgba8unorm',
+          },
+        ],
+      },
+    });
+
+    const kSize = 7;
+    const outputTexture = t.createTextureTracked({
+      format: 'rgba8unorm',
+      size: [kSize, kSize, 1] as const,
+      usage: GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.COPY_SRC,
+    });
+
+    // Clear outputTexture to Green
+    const commandEncoder = t.device.createCommandEncoder();
+    const renderPassEncoder = commandEncoder.beginRenderPass({
+      colorAttachments: [
+        {
+          view: outputTexture.createView(),
+          loadOp: 'clear',
+          clearValue: { r: 0.0, g: 1.0, b: 0.0, a: 1.0 },
+          storeOp: 'store',
+        },
+      ],
+    });
+    renderPassEncoder.setPipeline(renderPipeline);
+    renderPassEncoder.draw(6);
+    renderPassEncoder.end();
+
+    const kBytesPerRow = 256;
+    const kBytesPerPixel = 4;
+    const outputDataSize = kBytesPerRow * (kSize - 1) + kSize * kBytesPerPixel;
+    const outputBuffer = t.createBufferTracked({
+      size: outputDataSize,
+      usage: GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST,
+    });
+
+    commandEncoder.copyTextureToBuffer(
+      {
+        texture: outputTexture,
+      },
+      {
+        buffer: outputBuffer,
+        bytesPerRow: kBytesPerRow,
+        rowsPerImage: kSize,
+      },
+      [kSize, kSize, 1]
+    );
+    t.queue.submit([commandEncoder.finish()]);
+
+    // The top-left part should be Green and the bottom-right part should be Red
+    const expectedData = new Uint8Array(outputDataSize);
+    for (let y = 0; y < kSize; ++y) {
+      const baseOffset = kBytesPerRow * y;
+      for (let x = 0; x < kSize; ++x) {
+        const lastRed = kSize - y - 1;
+        for (let i = 0; i < lastRed; ++i) {
+          expectedData[baseOffset + i * 4] = 0;
+          expectedData[baseOffset + i * 4 + 1] = 255;
+          expectedData[baseOffset + i * 4 + 2] = 0;
+          expectedData[baseOffset + i * 4 + 3] = 255;
+        }
+        for (let j = lastRed; j < kSize; ++j) {
+          expectedData[baseOffset + j * 4] = 255;
+          expectedData[baseOffset + j * 4 + 1] = 0;
+          expectedData[baseOffset + j * 4 + 2] = 0;
+          expectedData[baseOffset + j * 4 + 3] = 255;
+        }
+      }
+    }
+    t.expectGPUBufferValuesEqual(outputBuffer, expectedData);
+  });
diff --git a/src/webgpu/shader/execution/statement/phony.spec.ts b/src/webgpu/shader/execution/statement/phony.spec.ts
index 1f28d040f2d8..309d8848523d 100644
--- a/src/webgpu/shader/execution/statement/phony.spec.ts
+++ b/src/webgpu/shader/execution/statement/phony.spec.ts
@@ -88,6 +88,10 @@ const kTests = {
     src: `_ = put(42i);`,
     values: [42, 0],
   },
+  call_in_subexpr: {
+    src: `_ = put(42i) + 1;`,
+    values: [42, 0],
+  },
   nested_call: {
     src: `_ = put(put(42)+1);`,
     values: [42, 43, 0],
diff --git a/src/webgpu/shader/validation/decl/var.spec.ts b/src/webgpu/shader/validation/decl/var.spec.ts
index f9e15bd6e2a6..1abf8bcf4fdb 100644
--- a/src/webgpu/shader/validation/decl/var.spec.ts
+++ b/src/webgpu/shader/validation/decl/var.spec.ts
@@ -749,7 +749,8 @@ g.test('var_access_mode_bad_other_template_contents')
   .fn(t => {
     const prog = `@group(0) @binding(0)
                   var<${t.params.prefix}${t.params.accessMode}${t.params.suffix}> x: i32;`;
-    const ok = t.params.prefix === 'storage,' && t.params.suffix === '';
+    const ok =
+      t.params.prefix === 'storage,' && (t.params.suffix === '' || t.params.suffix === ',');
     t.expectCompileResult(ok, prog);
   });
 
diff --git a/src/webgpu/shader/validation/expression/binary/short_circuiting_and_or.spec.ts b/src/webgpu/shader/validation/expression/binary/short_circuiting_and_or.spec.ts
new file mode 100644
index 000000000000..30f521e54944
--- /dev/null
+++ b/src/webgpu/shader/validation/expression/binary/short_circuiting_and_or.spec.ts
@@ -0,0 +1,264 @@
+export const description = `
+Validation tests for short-circuiting && and || expressions.
+`;
+
+import { makeTestGroup } from '../../../../../common/framework/test_group.js';
+import { keysOf, objectsToRecord } from '../../../../../common/util/data_tables.js';
+import {
+  kAllScalarsAndVectors,
+  ScalarType,
+  scalarTypeOf,
+  Type,
+} from '../../../../util/conversion.js';
+import { ShaderValidationTest } from '../../shader_validation_test.js';
+
+export const g = makeTestGroup(ShaderValidationTest);
+
+// A list of scalar and vector types.
+const kScalarAndVectorTypes = objectsToRecord(kAllScalarsAndVectors);
+
+g.test('scalar_vector')
+  .desc(
+    `
+  Validates that scalar and vector short-circuiting operators are only accepted for scalar booleans.
+  `
+  )
+  .params(u =>
+    u
+      .combine('op', ['&&', '||'])
+      .combine('lhs', keysOf(kScalarAndVectorTypes))
+      .combine(
+        'rhs',
+        // Skip vec3 and vec4 on the RHS to keep the number of subcases down.
+        keysOf(kScalarAndVectorTypes).filter(
+          value => !(value.startsWith('vec3') || value.startsWith('vec4'))
+        )
+      )
+      .beginSubcases()
+  )
+  .beforeAllSubcases(t => {
+    if (
+      scalarTypeOf(kScalarAndVectorTypes[t.params.lhs]) === Type.f16 ||
+      scalarTypeOf(kScalarAndVectorTypes[t.params.rhs]) === Type.f16
+    ) {
+      t.selectDeviceOrSkipTestCase('shader-f16');
+    }
+  })
+  .fn(t => {
+    const lhs = kScalarAndVectorTypes[t.params.lhs];
+    const rhs = kScalarAndVectorTypes[t.params.rhs];
+    const lhsElement = scalarTypeOf(lhs);
+    const rhsElement = scalarTypeOf(rhs);
+    const hasF16 = lhsElement === Type.f16 || rhsElement === Type.f16;
+    const code = `
+${hasF16 ? 'enable f16;' : ''}
+const lhs = ${lhs.create(0).wgsl()};
+const rhs = ${rhs.create(0).wgsl()};
+const foo = lhs ${t.params.op} rhs;
+`;
+
+    // Determine if the types are compatible.
+    let valid = false;
+    if (lhs instanceof ScalarType && rhs instanceof ScalarType) {
+      valid = lhsElement === Type.bool && rhsElement === Type.bool;
+    }
+
+    t.expectCompileResult(valid, code);
+  });
+
+interface InvalidTypeConfig {
+  // An expression that produces a value of the target type.
+  expr: string;
+  // A function that converts an expression of the target type into a valid boolean operand.
+  control: (x: string) => string;
+}
+const kInvalidTypes: Record<string, InvalidTypeConfig> = {
+  mat2x2f: {
+    expr: 'm',
+    control: e => `bool(${e}[0][0])`,
+  },
+
+  array: {
+    expr: 'arr',
+    control: e => `${e}[0]`,
+  },
+
+  ptr: {
+    expr: '(&b)',
+    control: e => `*${e}`,
+  },
+
+  atomic: {
+    expr: 'a',
+    control: e => `bool(atomicLoad(&${e}))`,
+  },
+
+  texture: {
+    expr: 't',
+    control: e => `bool(textureLoad(${e}, vec2(), 0).x)`,
+  },
+
+  sampler: {
+    expr: 's',
+    control: e => `bool(textureSampleLevel(t, ${e}, vec2(), 0).x)`,
+  },
+
+  struct: {
+    expr: 'str',
+    control: e => `${e}.b`,
+  },
+};
+
+g.test('invalid_types')
+  .desc(
+    `
+  Validates that short-circuiting expressions are never accepted for non-scalar and non-vector types.
+  `
+  )
+  .params(u =>
+    u
+      .combine('op', ['&&', '||'])
+      .combine('type', keysOf(kInvalidTypes))
+      .combine('control', [true, false])
+      .beginSubcases()
+  )
+  .fn(t => {
+    const type = kInvalidTypes[t.params.type];
+    const expr = t.params.control ? type.control(type.expr) : type.expr;
+    const code = `
+@group(0) @binding(0) var t : texture_2d<f32>;
+@group(0) @binding(1) var s : sampler;
+@group(0) @binding(2) var<storage, read_write> a : atomic<i32>;
+
+struct S { b : bool }
+
+var<private> b : bool;
+var<private> m : mat2x2f;
+var<private> arr : array<bool, 4>;
+var<private> str : S;
+
+@compute @workgroup_size(1)
+fn main() {
+  let foo = ${expr} ${t.params.op} ${expr};
+}
+`;
+
+    t.expectCompileResult(t.params.control, code);
+  });
+
+// A map from operator to the value of the LHS that will cause short-circuiting.
+const kLhsForShortCircuit: Record<string, boolean> = {
+  '&&': false,
+  '||': true,
+};
+
+// A list of expressions that are invalid unless guarded by a short-circuiting expression.
+const kInvalidRhsExpressions: Record<string, string> = {
+  overflow: 'i32(1<<thirty_one) < 0',
+  div_zero_i32: '(1 / zero_i32) == 0',
+  div_zero_f32: '(one_f32 / 0) == 0',
+  builtin: 'sqrt(-one_f32) == 0',
+};
+
+g.test('invalid_rhs_const')
+  .desc(
+    `
+  Validates that a short-circuiting expression with a const-expression LHS guards the evaluation of its RHS expression.
+  `
+  )
+  .params(u =>
+    u
+      .combine('op', ['&&', '||'])
+      .combine('rhs', keysOf(kInvalidRhsExpressions))
+      .combine('short_circuit', [true, false])
+      .beginSubcases()
+  )
+  .fn(t => {
+    let lhs = kLhsForShortCircuit[t.params.op];
+    if (!t.params.short_circuit) {
+      lhs = !lhs;
+    }
+    const code = `
+const thirty_one = 31u;
+const zero_i32 = 0i;
+const one_f32 = 1.0f;
+
+@compute @workgroup_size(1)
+fn main() {
+  let foo = ${lhs} ${t.params.op} ${kInvalidRhsExpressions[t.params.rhs]};
+}
+`;
+
+    t.expectCompileResult(t.params.short_circuit, code);
+  });
+
+g.test('invalid_rhs_override')
+  .desc(
+    `
+  Validates that a short-circuiting expression with an override-expression LHS guards the evaluation of its RHS expression.
+  `
+  )
+  .params(u =>
+    u
+      .combine('op', ['&&', '||'])
+      .combine('rhs', keysOf(kInvalidRhsExpressions))
+      .combine('short_circuit', [true, false])
+      .beginSubcases()
+  )
+  .fn(t => {
+    let lhs = kLhsForShortCircuit[t.params.op];
+    if (!t.params.short_circuit) {
+      lhs = !lhs;
+    }
+    const code = `
+override cond : bool;
+override zero_i32 = 0i;
+override one_f32 = 1.0f;
+override thirty_one = 31u;
+override foo = cond ${t.params.op} ${kInvalidRhsExpressions[t.params.rhs]};
+`;
+
+    const constants: Record<string, number> = {};
+    constants['cond'] = lhs ? 1 : 0;
+    t.expectPipelineResult({
+      expectedResult: t.params.short_circuit,
+      code,
+      constants,
+      reference: ['foo'],
+    });
+  });
+
+// A list of expressions that are invalid unless guarded by a short-circuiting expression.
+// The control case will use `value = 10`, the failure case will use `value = 1`.
+const kInvalidArrayCounts: Record<string, string> = {
+  negative: 'value - 2',
+  sqrt_neg1: 'u32(sqrt(value - 2))',
+  nested: '10 + array<i32, value - 2>()[0]',
+};
+
+g.test('invalid_array_count_on_rhs')
+  .desc(
+    `
+  Validates that an invalid array count expression is not guarded by a short-circuiting expression.
+  `
+  )
+  .params(u =>
+    u
+      .combine('op', ['&&', '||'])
+      .combine('rhs', keysOf(kInvalidArrayCounts))
+      .combine('control', [true, false])
+      .beginSubcases()
+  )
+  .fn(t => {
+    const lhs = t.params.op === '&&' ? 'false' : 'true';
+    const code = `
+const value = ${t.params.control ? '10' : '1'};
+
+@compute @workgroup_size(1)
+fn main() {
+  let foo = ${lhs} ${t.params.op} array<bool, ${kInvalidArrayCounts[t.params.rhs]}>()[0];
+}
+`;
+
+    t.expectCompileResult(t.params.control, code);
+  });
diff --git a/src/webgpu/shader/validation/expression/call/builtin/clamp.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/clamp.spec.ts
index 1ac752a3bfa9..ff0114097f90 100644
--- a/src/webgpu/shader/validation/expression/call/builtin/clamp.spec.ts
+++ b/src/webgpu/shader/validation/expression/call/builtin/clamp.spec.ts
@@ -127,6 +127,8 @@ Validates that low <= high.
         const scalar = scalarTypeOf(ty);
         return scalar !== Type.abstractInt && scalar !== Type.abstractFloat;
       })
+      // in_shader: Is the function call statically accessed by the entry point?
+      .combine('in_shader', [false, true] as const)
   )
   .beforeAllSubcases(t => {
     const ty = kValuesTypes[t.params.type];
@@ -176,7 +178,10 @@ fn foo() {
     const shader_error =
       error && t.params.lowStage === 'constant' && t.params.highStage === 'constant';
     const pipeline_error =
-      error && t.params.lowStage !== 'runtime' && t.params.highStage !== 'runtime';
+      t.params.in_shader &&
+      error &&
+      t.params.lowStage !== 'runtime' &&
+      t.params.highStage !== 'runtime';
     t.expectCompileResult(!shader_error, wgsl);
     if (!shader_error) {
       const constants: Record<string, number> = {};
@@ -187,6 +192,7 @@ fn foo() {
         code: wgsl,
         constants,
         reference: ['o_low', 'o_high'],
+        statements: t.params.in_shader ? ['foo();'] : [],
       });
     }
   });
diff --git a/src/webgpu/shader/validation/expression/call/builtin/extractBits.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/extractBits.spec.ts
index 80fe7ccaca5e..32abc477ee8f 100644
--- a/src/webgpu/shader/validation/expression/call/builtin/extractBits.spec.ts
+++ b/src/webgpu/shader/validation/expression/call/builtin/extractBits.spec.ts
@@ -98,6 +98,8 @@ Validates that count and offset must be smaller than the size of the primitive.
         { offset: 0, count: 33 },
         { offset: 1, count: 33 },
       ] as const)
+      // in_shader: Is the function call statically accessed by the entry point?
+      .combine('in_shader', [false, true] as const)
   )
   .fn(t => {
     let offsetArg = '';
@@ -138,7 +140,10 @@ fn foo() {
     const shader_error =
       error && t.params.offsetStage === 'constant' && t.params.countStage === 'constant';
     const pipeline_error =
-      error && t.params.offsetStage !== 'runtime' && t.params.countStage !== 'runtime';
+      t.params.in_shader &&
+      error &&
+      t.params.offsetStage !== 'runtime' &&
+      t.params.countStage !== 'runtime';
     t.expectCompileResult(!shader_error, wgsl);
     if (!shader_error) {
       const constants: Record<string, number> = {};
@@ -149,6 +154,7 @@ fn foo() {
         code: wgsl,
         constants,
         reference: ['o_offset', 'o_count'],
+        statements: t.params.in_shader ? ['foo();'] : [],
       });
     }
   });
diff --git a/src/webgpu/shader/validation/expression/call/builtin/insertBits.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/insertBits.spec.ts
index 57644ad36fb4..b302bfd14677 100644
--- a/src/webgpu/shader/validation/expression/call/builtin/insertBits.spec.ts
+++ b/src/webgpu/shader/validation/expression/call/builtin/insertBits.spec.ts
@@ -119,6 +119,8 @@ Validates that count and offset must be smaller than the size of the primitive.
         { offset: 0, count: 33 },
         { offset: 1, count: 33 },
       ] as const)
+      // in_shader: Is the function call statically accessed by the entry point?
+      .combine('in_shader', [false, true] as const)
   )
   .fn(t => {
     let offsetArg = '';
@@ -160,7 +162,10 @@ fn foo() {
     const shader_error =
       error && t.params.offsetStage === 'constant' && t.params.countStage === 'constant';
     const pipeline_error =
-      error && t.params.offsetStage !== 'runtime' && t.params.countStage !== 'runtime';
+      t.params.in_shader &&
+      error &&
+      t.params.offsetStage !== 'runtime' &&
+      t.params.countStage !== 'runtime';
     t.expectCompileResult(!shader_error, wgsl);
     if (!shader_error) {
       const constants: Record<string, number> = {};
@@ -171,6 +176,7 @@ fn foo() {
         code: wgsl,
         constants,
         reference: ['o_offset', 'o_count'],
+        statements: t.params.in_shader ? ['foo();'] : [],
       });
     }
   });
diff --git a/src/webgpu/shader/validation/expression/call/builtin/ldexp.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/ldexp.spec.ts
index 826354d1ff08..55a702d71f0a 100644
--- a/src/webgpu/shader/validation/expression/call/builtin/ldexp.spec.ts
+++ b/src/webgpu/shader/validation/expression/call/builtin/ldexp.spec.ts
@@ -143,6 +143,8 @@ g.test('partial_values')
         cases.push({ value: bias + 2 });
         return cases;
       })
+      // in_shader: Is the functino call statically accessed by the entry point?
+      .combine('in_shader', [false, true] as const)
   )
   .beforeAllSubcases(t => {
     const ty = kValidArgumentTypesA[t.params.typeA];
@@ -179,7 +181,7 @@ fn foo() {
     const bias = biasForType(scalarTypeOf(tyA));
     const error = t.params.value > bias + 1;
     const shader_error = error && t.params.stage === 'constant';
-    const pipeline_error = error && t.params.stage === 'override';
+    const pipeline_error = t.params.in_shader && error && t.params.stage === 'override';
     t.expectCompileResult(!shader_error, wgsl);
     if (!shader_error) {
       const constants: Record<string, number> = {};
@@ -189,6 +191,7 @@ fn foo() {
         code: wgsl,
         constants,
         reference: ['o_b'],
+        statements: t.params.in_shader ? ['foo();'] : [],
       });
     }
   });
diff --git a/src/webgpu/shader/validation/expression/call/builtin/normalize.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/normalize.spec.ts
index 28e1d9cdc61b..bed18020632d 100644
--- a/src/webgpu/shader/validation/expression/call/builtin/normalize.spec.ts
+++ b/src/webgpu/shader/validation/expression/call/builtin/normalize.spec.ts
@@ -12,7 +12,13 @@ import {
   scalarTypeOf,
   ScalarType,
 } from '../../../../../util/conversion.js';
-import { QuantizeFunc, quantizeToF16, quantizeToF32 } from '../../../../../util/math.js';
+import {
+  QuantizeFunc,
+  quantizeToF16,
+  quantizeToF32,
+  isSubnormalNumberF16,
+  isSubnormalNumberF32,
+} from '../../../../../util/math.js';
 import { ShaderValidationTest } from '../../../shader_validation_test.js';
 
 import {
@@ -37,6 +43,17 @@ function quantizeFunctionForScalarType(type: ScalarType): QuantizeFunc<number> {
   }
 }
 
+function isSubnormalFunctionForScalarType(type: ScalarType): (v: number) => boolean {
+  switch (type) {
+    case Type.f32:
+      return isSubnormalNumberF32;
+    case Type.f16:
+      return isSubnormalNumberF16;
+    default:
+      return (v: number) => false;
+  }
+}
+
 g.test('values')
   .desc(
     `
@@ -73,6 +90,11 @@ Validates that constant evaluation and override evaluation of ${builtin}() rejec
       expectedResult = false;
     }
 
+    // We skip tests with values that would involve subnormal computations in
+    // order to avoid defining a specific behavior (flush to zero).
+    const isSubnormalFn = isSubnormalFunctionForScalarType(scalarType);
+    t.skipIf(isSubnormalFn(vv) || isSubnormalFn(dp) || isSubnormalFn(len));
+
     validateConstOrOverrideBuiltinEval(
       t,
       builtin,
diff --git a/src/webgpu/shader/validation/expression/call/builtin/quadBroadcast.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/quadBroadcast.spec.ts
new file mode 100644
index 000000000000..6988f17b9ede
--- /dev/null
+++ b/src/webgpu/shader/validation/expression/call/builtin/quadBroadcast.spec.ts
@@ -0,0 +1,286 @@
+export const description = `
+Validation tests for quadBroadcast
+`;
+
+import { makeTestGroup } from '../../../../../../common/framework/test_group.js';
+import { keysOf, objectsToRecord } from '../../../../../../common/util/data_tables.js';
+import {
+  isConvertible,
+  Type,
+  elementTypeOf,
+  kAllScalarsAndVectors,
+} from '../../../../../util/conversion.js';
+import { ShaderValidationTest } from '../../../shader_validation_test.js';
+
+export const g = makeTestGroup(ShaderValidationTest);
+
+g.test('requires_subgroups')
+  .desc('Validates that the subgroups feature is required')
+  .params(u => u.combine('enable', [false, true] as const))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const wgsl = `
+${t.params.enable ? 'enable subgroups;' : ''}
+fn foo() {
+  _ = quadBroadcast(0, 0);
+}`;
+
+    t.expectCompileResult(t.params.enable, wgsl);
+  });
+
+g.test('requires_subgroups_f16')
+  .desc('Validates that the subgroups feature is required')
+  .params(u => u.combine('enable', [false, true] as const))
+  .beforeAllSubcases(t => {
+    const features: GPUFeatureName[] = ['shader-f16', 'subgroups' as GPUFeatureName];
+    if (t.params.enable) {
+      features.push('subgroups-f16' as GPUFeatureName);
+    }
+    t.selectDeviceOrSkipTestCase(features);
+  })
+  .fn(t => {
+    const wgsl = `
+enable f16;
+enable subgroups;
+${t.params.enable ? 'enable subgroups_f16;' : ''}
+fn foo() {
+  _ = quadBroadcast(0h, 0);
+}`;
+
+    t.expectCompileResult(t.params.enable, wgsl);
+  });
+
+const kArgumentTypes = objectsToRecord(kAllScalarsAndVectors);
+
+const kStages: Record<string, string> = {
+  constant: `
+enable subgroups;
+@compute @workgroup_size(16)
+fn main() {
+  const x = quadBroadcast(0, 0);
+}`,
+  override: `
+enable subgroups;
+override o = quadBroadcast(0, 0);`,
+  runtime: `
+enable subgroups;
+@compute @workgroup_size(16)
+fn main() {
+  let x = quadBroadcast(0, 0);
+}`,
+};
+
+g.test('early_eval')
+  .desc('Ensures the builtin is not able to be compile time evaluated')
+  .params(u => u.combine('stage', keysOf(kStages)))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const code = kStages[t.params.stage];
+    t.expectCompileResult(t.params.stage === 'runtime', code);
+  });
+
+g.test('must_use')
+  .desc('Tests that the builtin has the @must_use attribute')
+  .params(u => u.combine('must_use', [true, false] as const))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const wgsl = `
+enable subgroups;
+@compute @workgroup_size(16)
+fn main() {
+  ${t.params.must_use ? '_ = ' : ''}quadBroadcast(0, 0);
+}`;
+
+    t.expectCompileResult(t.params.must_use, wgsl);
+  });
+
+g.test('data_type')
+  .desc('Validates data parameter type')
+  .params(u => u.combine('type', keysOf(kArgumentTypes)))
+  .beforeAllSubcases(t => {
+    const features = ['subgroups' as GPUFeatureName];
+    const type = kArgumentTypes[t.params.type];
+    if (type.requiresF16()) {
+      features.push('subgroups-f16' as GPUFeatureName);
+      features.push('shader-f16');
+    }
+    t.selectDeviceOrSkipTestCase(features);
+  })
+  .fn(t => {
+    const type = kArgumentTypes[t.params.type];
+    let enables = `enable subgroups;\n`;
+    if (type.requiresF16()) {
+      enables += `enable subgroups_f16;\nenable f16;`;
+    }
+    const wgsl = `
+${enables}
+@compute @workgroup_size(1)
+fn main() {
+  _ = quadBroadcast(${type.create(0).wgsl()}, 0);
+}`;
+
+    t.expectCompileResult(elementTypeOf(type) !== Type.bool, wgsl);
+  });
+
+g.test('return_type')
+  .desc('Validates data parameter type')
+  .params(u =>
+    u
+      .combine('dataType', keysOf(kArgumentTypes))
+      .combine('retType', keysOf(kArgumentTypes))
+      .filter(t => {
+        const retType = kArgumentTypes[t.retType];
+        const retEleTy = elementTypeOf(retType);
+        const dataType = kArgumentTypes[t.dataType];
+        const dataEleTy = elementTypeOf(dataType);
+        return (
+          retEleTy !== Type.abstractInt &&
+          retEleTy !== Type.abstractFloat &&
+          dataEleTy !== Type.abstractInt &&
+          dataEleTy !== Type.abstractFloat
+        );
+      })
+  )
+  .beforeAllSubcases(t => {
+    const features = ['subgroups' as GPUFeatureName];
+    const dataType = kArgumentTypes[t.params.dataType];
+    const retType = kArgumentTypes[t.params.retType];
+    if (dataType.requiresF16() || retType.requiresF16()) {
+      features.push('subgroups-f16' as GPUFeatureName);
+      features.push('shader-f16');
+    }
+    t.selectDeviceOrSkipTestCase(features);
+  })
+  .fn(t => {
+    const dataType = kArgumentTypes[t.params.dataType];
+    const retType = kArgumentTypes[t.params.retType];
+    let enables = `enable subgroups;\n`;
+    if (dataType.requiresF16() || retType.requiresF16()) {
+      enables += `enable subgroups_f16;\nenable f16;`;
+    }
+    const wgsl = `
+${enables}
+@compute @workgroup_size(1)
+fn main() {
+  let res : ${retType.toString()} = quadBroadcast(${dataType.create(0).wgsl()}, 0);
+}`;
+
+    const expect = elementTypeOf(dataType) !== Type.bool && dataType === retType;
+    t.expectCompileResult(expect, wgsl);
+  });
+
+g.test('id_type')
+  .desc('Validates id parameter type')
+  .params(u => u.combine('type', keysOf(kArgumentTypes)))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const type = kArgumentTypes[t.params.type];
+    const wgsl = `
+enable subgroups;
+@compute @workgroup_size(1)
+fn main() {
+  _ = quadBroadcast(0, ${type.create(0).wgsl()});
+}`;
+
+    const expect = isConvertible(type, Type.u32) || isConvertible(type, Type.i32);
+    t.expectCompileResult(expect, wgsl);
+  });
+
+const kIdCases = {
+  const_decl: {
+    code: 'const_decl',
+    valid: true,
+  },
+  const_literal: {
+    code: '0',
+    valid: true,
+  },
+  const_expr: {
+    code: 'const_decl + 2',
+    valid: true,
+  },
+  let_decl: {
+    code: 'let_decl',
+    valid: false,
+  },
+  override_decl: {
+    code: 'override_decl',
+    valid: false,
+  },
+  var_func_decl: {
+    code: 'var_func_decl',
+    valid: false,
+  },
+  var_priv_decl: {
+    code: 'var_priv_decl',
+    valid: false,
+  },
+};
+
+g.test('id_constness')
+  .desc('Validates that id must be a const-expression')
+  .params(u => u.combine('value', keysOf(kIdCases)))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const wgsl = `
+enable subgroups;
+override override_decl : u32;
+var<private> var_priv_decl : u32;
+fn foo() {
+  var var_func_decl : u32;
+  let let_decl = var_func_decl;
+  const const_decl = 0u;
+  _ = quadBroadcast(0, ${kIdCases[t.params.value].code});
+}`;
+
+    t.expectCompileResult(kIdCases[t.params.value].valid, wgsl);
+  });
+
+g.test('stage')
+  .desc('Validates it is only usable in correct stage')
+  .params(u => u.combine('stage', ['compute', 'fragment', 'vertex'] as const))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const compute = `
+@compute @workgroup_size(1)
+fn main() {
+  foo();
+}`;
+
+    const fragment = `
+@fragment
+fn main() {
+  foo();
+}`;
+
+    const vertex = `
+@vertex
+fn main() -> @builtin(position) vec4f {
+  foo();
+  return vec4f();
+}`;
+
+    const entry = { compute, fragment, vertex }[t.params.stage];
+    const wgsl = `
+enable subgroups;
+fn foo() {
+  _ = quadBroadcast(0, 0);
+}
+
+${entry}
+`;
+
+    t.expectCompileResult(t.params.stage !== 'vertex', wgsl);
+  });
diff --git a/src/webgpu/shader/validation/expression/call/builtin/quadSwap.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/quadSwap.spec.ts
new file mode 100644
index 000000000000..3812ba057ed6
--- /dev/null
+++ b/src/webgpu/shader/validation/expression/call/builtin/quadSwap.spec.ts
@@ -0,0 +1,227 @@
+export const description = `
+Validation tests for quadSwapX, quadSwapY, and quadSwapDiagonal.
+`;
+
+import { makeTestGroup } from '../../../../../../common/framework/test_group.js';
+import { keysOf, objectsToRecord } from '../../../../../../common/util/data_tables.js';
+import {
+  Type,
+  elementTypeOf,
+  kAllScalarsAndVectors,
+  isConvertible,
+} from '../../../../../util/conversion.js';
+import { ShaderValidationTest } from '../../../shader_validation_test.js';
+
+export const g = makeTestGroup(ShaderValidationTest);
+
+const kOps = ['quadSwapX', 'quadSwapY', 'quadSwapDiagonal'] as const;
+
+g.test('requires_subgroups')
+  .desc('Validates that the subgroups feature is required')
+  .params(u => u.combine('enable', [false, true] as const).combine('op', kOps))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const wgsl = `
+${t.params.enable ? 'enable subgroups;' : ''}
+fn foo() {
+  _ = ${t.params.op}(0);
+}`;
+
+    t.expectCompileResult(t.params.enable, wgsl);
+  });
+
+g.test('requires_subgroups_f16')
+  .desc('Validates that the subgroups feature is required')
+  .params(u => u.combine('enable', [false, true] as const).combine('op', kOps))
+  .beforeAllSubcases(t => {
+    const features: GPUFeatureName[] = ['shader-f16', 'subgroups' as GPUFeatureName];
+    if (t.params.enable) {
+      features.push('subgroups-f16' as GPUFeatureName);
+    }
+    t.selectDeviceOrSkipTestCase(features);
+  })
+  .fn(t => {
+    const wgsl = `
+enable f16;
+enable subgroups;
+${t.params.enable ? 'enable subgroups_f16;' : ''}
+fn foo() {
+  _ = ${t.params.op}(0h);
+}`;
+
+    t.expectCompileResult(t.params.enable, wgsl);
+  });
+
+const kStages: Record<string, (op: string) => string> = {
+  constant: (op: string) => {
+    return `
+enable subgroups;
+@compute @workgroup_size(16)
+fn main() {
+  const x = ${op}(0);
+}`;
+  },
+  override: (op: string) => {
+    return `
+enable subgroups
+override o = ${op}(0);`;
+  },
+  runtime: (op: string) => {
+    return `
+enable subgroups;
+@compute @workgroup_size(16)
+fn main() {
+  let x = ${op}(0);
+}`;
+  },
+};
+
+g.test('early_eval')
+  .desc('Ensures the builtin is not able to be compile time evaluated')
+  .params(u => u.combine('stage', keysOf(kStages)).combine('op', kOps))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const code = kStages[t.params.stage](t.params.op);
+    t.expectCompileResult(t.params.stage === 'runtime', code);
+  });
+
+g.test('must_use')
+  .desc('Tests that the builtin has the @must_use attribute')
+  .params(u => u.combine('must_use', [true, false] as const).combine('op', kOps))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const wgsl = `
+enable subgroups;
+@compute @workgroup_size(16)
+fn main() {
+  ${t.params.must_use ? '_ = ' : ''}${t.params.op}(0);
+}`;
+
+    t.expectCompileResult(t.params.must_use, wgsl);
+  });
+
+const kTypes = objectsToRecord(kAllScalarsAndVectors);
+
+g.test('data_type')
+  .desc('Validates data parameter type')
+  .params(u => u.combine('type', keysOf(kTypes)).combine('op', kOps))
+  .beforeAllSubcases(t => {
+    const features = ['subgroups' as GPUFeatureName];
+    const type = kTypes[t.params.type];
+    if (type.requiresF16()) {
+      features.push('shader-f16');
+      features.push('subgroups-f16' as GPUFeatureName);
+    }
+    t.selectDeviceOrSkipTestCase(features);
+  })
+  .fn(t => {
+    const type = kTypes[t.params.type];
+    let enables = `enable subgroups;\n`;
+    if (type.requiresF16()) {
+      enables += `enable f16;\nenable subgroups_f16;`;
+    }
+    const wgsl = `
+${enables}
+@compute @workgroup_size(1)
+fn main() {
+  _ = ${t.params.op}(${type.create(0).wgsl()});
+}`;
+
+    const eleType = elementTypeOf(type);
+    t.expectCompileResult(eleType !== Type.bool, wgsl);
+  });
+
+g.test('return_type')
+  .desc('Validates return type')
+  .params(u =>
+    u
+      .combine('retType', keysOf(kTypes))
+      .filter(t => {
+        const type = kTypes[t.retType];
+        const eleType = elementTypeOf(type);
+        return eleType !== Type.abstractInt && eleType !== Type.abstractFloat;
+      })
+      .combine('op', kOps)
+      .combine('paramType', keysOf(kTypes))
+  )
+  .beforeAllSubcases(t => {
+    const features = ['subgroups' as GPUFeatureName];
+    const retType = kTypes[t.params.retType];
+    const paramType = kTypes[t.params.paramType];
+    if (retType.requiresF16() || paramType.requiresF16()) {
+      features.push('shader-f16');
+      features.push('subgroups-f16' as GPUFeatureName);
+    }
+    t.selectDeviceOrSkipTestCase(features);
+  })
+  .fn(t => {
+    const retType = kTypes[t.params.retType];
+    const paramType = kTypes[t.params.paramType];
+    let enables = `enable subgroups;\n`;
+    if (retType.requiresF16() || paramType.requiresF16()) {
+      enables += `enable f16;\nenable subgroups_f16;`;
+    }
+    const wgsl = `
+${enables}
+@compute @workgroup_size(1)
+fn main() {
+  let res : ${retType.toString()} = ${t.params.op}(${paramType.create(0).wgsl()});
+}`;
+
+    // Can't just use isConvertible since functions must concretize the parameter
+    // type before examining the whole statement.
+    const eleParamType = elementTypeOf(paramType);
+    const eleRetType = elementTypeOf(retType);
+    let expect = paramType === retType && eleRetType !== Type.bool;
+    if (eleParamType === Type.abstractInt) {
+      expect = eleRetType === Type.i32 && isConvertible(paramType, retType);
+    } else if (eleParamType === Type.abstractFloat) {
+      expect = eleRetType === Type.f32 && isConvertible(paramType, retType);
+    }
+    t.expectCompileResult(expect, wgsl);
+  });
+
+g.test('stage')
+  .desc('validates builtin is only usable in the correct stages')
+  .params(u => u.combine('stage', ['compute', 'fragment', 'vertex'] as const).combine('op', kOps))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const compute = `
+@compute @workgroup_size(1)
+fn main() {
+  foo();
+}`;
+
+    const fragment = `
+@fragment
+fn main() {
+  foo();
+}`;
+
+    const vertex = `
+@vertex
+fn main() -> @builtin(position) vec4f {
+  foo();
+  return vec4f();
+}`;
+
+    const entry = { compute, fragment, vertex }[t.params.stage];
+    const wgsl = `
+enable subgroups;
+fn foo() {
+  _ = ${t.params.op}(0);
+}
+
+${entry}
+`;
+
+    t.expectCompileResult(t.params.stage !== 'vertex', wgsl);
+  });
diff --git a/src/webgpu/shader/validation/expression/call/builtin/refract.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/refract.spec.ts
index 51cf9553785c..387340f80e9f 100644
--- a/src/webgpu/shader/validation/expression/call/builtin/refract.spec.ts
+++ b/src/webgpu/shader/validation/expression/call/builtin/refract.spec.ts
@@ -2,10 +2,21 @@ const builtin = 'refract';
 export const description = `
 Validation tests for the ${builtin}() builtin.
 `;
-
 import { makeTestGroup } from '../../../../../../common/framework/test_group.js';
 import { keysOf, objectsToRecord } from '../../../../../../common/util/data_tables.js';
-import { Type, kConvertableToFloatVectors, scalarTypeOf } from '../../../../../util/conversion.js';
+import {
+  Type,
+  kConvertableToFloatVectors,
+  scalarTypeOf,
+  ScalarType,
+} from '../../../../../util/conversion.js';
+import {
+  QuantizeFunc,
+  quantizeToF16,
+  quantizeToF32,
+  isSubnormalNumberF16,
+  isSubnormalNumberF32,
+} from '../../../../../util/math.js';
 import { ShaderValidationTest } from '../../../shader_validation_test.js';
 
 import {
@@ -20,6 +31,28 @@ export const g = makeTestGroup(ShaderValidationTest);
 
 const kValidArgumentTypes = objectsToRecord(kConvertableToFloatVectors);
 
+function quantizeFunctionForScalarType(type: ScalarType): QuantizeFunc<number> {
+  switch (type) {
+    case Type.f32:
+      return quantizeToF32;
+    case Type.f16:
+      return quantizeToF16;
+    default:
+      return (v: number) => v;
+  }
+}
+
+function isSubnormalFunctionForScalarType(type: ScalarType): (v: number) => boolean {
+  switch (type) {
+    case Type.f32:
+      return isSubnormalNumberF32;
+    case Type.f16:
+      return isSubnormalNumberF16;
+    default:
+      return (v: number) => false;
+  }
+}
+
 g.test('values')
   .desc(
     `
@@ -64,6 +97,17 @@ where a the calculations result in a non-representable value for the given type.
     const c2_one_minus_b_dot_a_2 = vCheck.checkedResult(c2 * one_minus_b_dot_a_2);
     const k = vCheck.checkedResult(1.0 - c2_one_minus_b_dot_a_2);
 
+    const quantizeFn = quantizeFunctionForScalarType(scalarType);
+    const isSubnormalFn = isSubnormalFunctionForScalarType(scalarType);
+    // We skip tests with values that would involve subnormal computations in
+    // order to avoid defining a specific behavior (flush to zero).
+    t.skipIf(
+      isSubnormalFn(quantizeFn(b_dot_a)) ||
+        isSubnormalFn(quantizeFn(b_dot_a_2)) ||
+        isSubnormalFn(quantizeFn(c2)) ||
+        isSubnormalFn(quantizeFn(k))
+    );
+
     if (k >= 0) {
       // If the k is near zero it may fail on some implementations which implement sqrt as
       // 1/inversesqrt, so skip the test.
diff --git a/src/webgpu/shader/validation/expression/call/builtin/smoothstep.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/smoothstep.spec.ts
index 5a5a28fc7362..2879055ab216 100644
--- a/src/webgpu/shader/validation/expression/call/builtin/smoothstep.spec.ts
+++ b/src/webgpu/shader/validation/expression/call/builtin/smoothstep.spec.ts
@@ -51,16 +51,15 @@ Validates that constant evaluation and override evaluation of ${builtin}() rejec
   .fn(t => {
     const type = kValuesTypes[t.params.type];
 
-    // We expect to fail if low >= high as it results in a DBZ
-    const expectedResult = t.params.value1 >= t.params.value2;
+    // We expect to fail if low >= high.
+    const expectedResult = t.params.value1 < t.params.value2;
 
     validateConstOrOverrideBuiltinEval(
       t,
       builtin,
       expectedResult,
       [type.create(t.params.value1), type.create(t.params.value2), type.create(0)],
-      t.params.stage,
-      /* returnType */ concreteTypeOf(type, [Type.f32])
+      t.params.stage
     );
   });
 
@@ -81,6 +80,8 @@ g.test('partial_eval_errors')
       .beginSubcases()
       .expand('low', u => [0, 10])
       .expand('high', u => [0, 10])
+      // in_shader: Is the function call statically accessed by the entry point?
+      .combine('in_shader', [false, true] as const)
   )
   .beforeAllSubcases(t => {
     if (scalarTypeOf(kValuesTypes[t.params.type]) === Type.f16) {
@@ -130,7 +131,10 @@ fn foo() {
     const shader_error =
       error && t.params.lowStage === 'constant' && t.params.highStage === 'constant';
     const pipeline_error =
-      error && t.params.lowStage !== 'runtime' && t.params.highStage !== 'runtime';
+      t.params.in_shader &&
+      error &&
+      t.params.lowStage !== 'runtime' &&
+      t.params.highStage !== 'runtime';
     t.expectCompileResult(!shader_error, wgsl);
     if (!shader_error) {
       const constants: Record<string, number> = {};
@@ -141,6 +145,7 @@ fn foo() {
         code: wgsl,
         constants,
         reference: ['o_low', 'o_high'],
+        statements: t.params.in_shader ? ['foo();'] : [],
       });
     }
   });
@@ -159,10 +164,11 @@ Validates that scalar and vector arguments are rejected by ${builtin}() if not f
   })
   .fn(t => {
     const type = kArgumentTypes[t.params.type];
+    const expectedResult = isConvertibleToFloatType(elementTypeOf(type));
     validateConstOrOverrideBuiltinEval(
       t,
       builtin,
-      /* expectedResult */ isConvertibleToFloatType(elementTypeOf(type)),
+      expectedResult,
       [type.create(0), type.create(1), type.create(2)],
       'constant',
       /* returnType */ concreteTypeOf(type, [Type.f32])
@@ -344,7 +350,7 @@ g.test('early_eval_errors')
       t,
       builtin,
       /* expectedResult */ t.params.low < t.params.high,
-      [f32(0), f32(t.params.low), f32(t.params.high)],
+      [f32(t.params.low), f32(t.params.high), f32(0)],
       t.params.stage
     );
   });
diff --git a/src/webgpu/shader/validation/expression/call/builtin/subgroupAdd.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/subgroupAdd.spec.ts
new file mode 100644
index 000000000000..4f2a2af52197
--- /dev/null
+++ b/src/webgpu/shader/validation/expression/call/builtin/subgroupAdd.spec.ts
@@ -0,0 +1,235 @@
+export const description = `
+Validation tests for subgroupAdd and subgroupExclusiveAdd
+`;
+
+import { makeTestGroup } from '../../../../../../common/framework/test_group.js';
+import { keysOf, objectsToRecord } from '../../../../../../common/util/data_tables.js';
+import { Type, elementTypeOf, kAllScalarsAndVectors } from '../../../../../util/conversion.js';
+import { ShaderValidationTest } from '../../../shader_validation_test.js';
+
+export const g = makeTestGroup(ShaderValidationTest);
+
+const kBuiltins = ['subgroupAdd', 'subgroupExclusiveAdd', 'subgroupInclusiveAdd'] as const;
+
+const kStages: Record<string, (builtin: string) => string> = {
+  constant: (builtin: string) => {
+    return `
+enable subgroups;
+@compute @workgroup_size(16)
+fn main() {
+  const x = ${builtin}(0);
+}`;
+  },
+  override: (builtin: string) => {
+    return `
+enable subgroups;
+override o = ${builtin}(0);`;
+  },
+  runtime: (builtin: string) => {
+    return `
+enable subgroups;
+@compute @workgroup_size(16)
+fn main() {
+  let x = ${builtin}(0);
+}`;
+  },
+};
+
+g.test('early_eval')
+  .desc('Ensures the builtin is not able to be compile time evaluated')
+  .params(u => u.combine('stage', keysOf(kStages)).beginSubcases().combine('builtin', kBuiltins))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const code = kStages[t.params.stage](t.params.builtin);
+    t.expectCompileResult(t.params.stage === 'runtime', code);
+  });
+
+g.test('must_use')
+  .desc('Tests that the builtin has the @must_use attribute')
+  .params(u =>
+    u
+      .combine('must_use', [true, false] as const)
+      .beginSubcases()
+      .combine('builtin', kBuiltins)
+  )
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const wgsl = `
+enable subgroups;
+@compute @workgroup_size(16)
+fn main() {
+  ${t.params.must_use ? '_ = ' : ''}${t.params.builtin}(0);
+}`;
+
+    t.expectCompileResult(t.params.must_use, wgsl);
+  });
+
+const kArgumentTypes = objectsToRecord(kAllScalarsAndVectors);
+
+g.test('data_type')
+  .desc('Validates data parameter type')
+  .params(u =>
+    u.combine('type', keysOf(kArgumentTypes)).beginSubcases().combine('builtin', kBuiltins)
+  )
+  .beforeAllSubcases(t => {
+    const features = ['subgroups' as GPUFeatureName];
+    const type = kArgumentTypes[t.params.type];
+    if (type.requiresF16()) {
+      features.push('subgroups-f16' as GPUFeatureName);
+      features.push('shader-f16');
+    }
+    t.selectDeviceOrSkipTestCase(features);
+  })
+  .fn(t => {
+    const type = kArgumentTypes[t.params.type];
+    let enables = `enable subgroups;\n`;
+    if (type.requiresF16()) {
+      enables += `enable subgroups_f16;\nenable f16;`;
+    }
+    const wgsl = `
+${enables}
+@compute @workgroup_size(1)
+fn main() {
+  _ = ${t.params.builtin}(${type.create(0).wgsl()});
+}`;
+
+    t.expectCompileResult(elementTypeOf(type) !== Type.bool, wgsl);
+  });
+
+g.test('return_type')
+  .desc('Validates data parameter type')
+  .params(u =>
+    u
+      .combine('dataType', keysOf(kArgumentTypes))
+      .combine('retType', keysOf(kArgumentTypes))
+      .filter(t => {
+        const retType = kArgumentTypes[t.retType];
+        const retEleTy = elementTypeOf(retType);
+        const dataType = kArgumentTypes[t.dataType];
+        const dataEleTy = elementTypeOf(dataType);
+        return (
+          retEleTy !== Type.abstractInt &&
+          retEleTy !== Type.abstractFloat &&
+          dataEleTy !== Type.abstractInt &&
+          dataEleTy !== Type.abstractFloat
+        );
+      })
+      .beginSubcases()
+      .combine('builtin', kBuiltins)
+  )
+  .beforeAllSubcases(t => {
+    const features = ['subgroups' as GPUFeatureName];
+    const dataType = kArgumentTypes[t.params.dataType];
+    const retType = kArgumentTypes[t.params.retType];
+    if (dataType.requiresF16() || retType.requiresF16()) {
+      features.push('subgroups-f16' as GPUFeatureName);
+      features.push('shader-f16');
+    }
+    t.selectDeviceOrSkipTestCase(features);
+  })
+  .fn(t => {
+    const dataType = kArgumentTypes[t.params.dataType];
+    const retType = kArgumentTypes[t.params.retType];
+    let enables = `enable subgroups;\n`;
+    if (dataType.requiresF16() || retType.requiresF16()) {
+      enables += `enable subgroups_f16;\nenable f16;`;
+    }
+    const wgsl = `
+${enables}
+@compute @workgroup_size(1)
+fn main() {
+  let res : ${retType.toString()} = ${t.params.builtin}(${dataType.create(0).wgsl()});
+}`;
+
+    const expect = elementTypeOf(dataType) !== Type.bool && dataType === retType;
+    t.expectCompileResult(expect, wgsl);
+  });
+
+g.test('stage')
+  .desc('Validates it is only usable in correct stage')
+  .params(u =>
+    u
+      .combine('stage', ['compute', 'fragment', 'vertex'] as const)
+      .beginSubcases()
+      .combine('builtin', kBuiltins)
+  )
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const compute = `
+@compute @workgroup_size(1)
+fn main() {
+  foo();
+}`;
+
+    const fragment = `
+@fragment
+fn main() {
+  foo();
+}`;
+
+    const vertex = `
+@vertex
+fn main() -> @builtin(position) vec4f {
+  foo();
+  return vec4f();
+}`;
+
+    const entry = { compute, fragment, vertex }[t.params.stage];
+    const wgsl = `
+enable subgroups;
+fn foo() {
+  _ = ${t.params.builtin}(0);
+}
+
+${entry}
+`;
+
+    t.expectCompileResult(t.params.stage !== 'vertex', wgsl);
+  });
+
+const kInvalidTypeCases: Record<string, string> = {
+  array_u32: `array(1u,2u,3u)`,
+  array_f32: `array<f32, 4>()`,
+  struct_s: `S()`,
+  struct_t: `T(1, 1)`,
+  ptr_func: `&func_var`,
+  ptr_priv: `&priv_var`,
+  frexp_ret: `frexp(0)`,
+};
+
+g.test('invalid_types')
+  .desc('Tests that invalid non-plain types are rejected')
+  .params(u =>
+    u.combine('case', keysOf(kInvalidTypeCases)).beginSubcases().combine('builtin', kBuiltins)
+  )
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const val = kInvalidTypeCases[t.params.case];
+    const wgsl = `
+enable subgroups;
+
+struct S {
+  x : u32
+}
+
+struct T {
+  a : f32,
+  b : u32,
+}
+
+var<private> priv_var : f32;
+fn foo() {
+  var func_var : vec4u;
+  _ = ${t.params.builtin}(${val});
+}`;
+
+    t.expectCompileResult(false, wgsl);
+  });
diff --git a/src/webgpu/shader/validation/expression/call/builtin/subgroupAnyAll.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/subgroupAnyAll.spec.ts
new file mode 100644
index 000000000000..eaee33e62cff
--- /dev/null
+++ b/src/webgpu/shader/validation/expression/call/builtin/subgroupAnyAll.spec.ts
@@ -0,0 +1,186 @@
+export const description = `
+Validation tests for subgroupAny and subgroupAll.
+`;
+
+import { makeTestGroup } from '../../../../../../common/framework/test_group.js';
+import { keysOf, objectsToRecord } from '../../../../../../common/util/data_tables.js';
+import { Type, elementTypeOf, kAllScalarsAndVectors } from '../../../../../util/conversion.js';
+import { ShaderValidationTest } from '../../../shader_validation_test.js';
+
+export const g = makeTestGroup(ShaderValidationTest);
+
+const kOps = ['subgroupAny', 'subgroupAll'] as const;
+
+g.test('requires_subgroups')
+  .desc('Validates that the subgroups feature is required')
+  .params(u => u.combine('enable', [false, true] as const).combine('op', kOps))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const wgsl = `
+${t.params.enable ? 'enable subgroups;' : ''}
+fn foo() {
+  _ = ${t.params.op}(true);
+}`;
+
+    t.expectCompileResult(t.params.enable, wgsl);
+  });
+
+const kStages: Record<string, (op: string) => string> = {
+  constant: (op: string) => {
+    return `
+enable subgroups;
+@compute @workgroup_size(16)
+fn main() {
+  const x = ${op}(true);
+}`;
+  },
+  override: (op: string) => {
+    return `
+enable subgroups
+override o = select(0, 1, ${op}(true));`;
+  },
+  runtime: (op: string) => {
+    return `
+enable subgroups;
+@compute @workgroup_size(16)
+fn main() {
+  let x = ${op}(true);
+}`;
+  },
+};
+
+g.test('early_eval')
+  .desc('Ensures the builtin is not able to be compile time evaluated')
+  .params(u => u.combine('stage', keysOf(kStages)).combine('op', kOps))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const code = kStages[t.params.stage](t.params.op);
+    t.expectCompileResult(t.params.stage === 'runtime', code);
+  });
+
+g.test('must_use')
+  .desc('Tests that the builtin has the @must_use attribute')
+  .params(u => u.combine('must_use', [true, false] as const).combine('op', kOps))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const wgsl = `
+enable subgroups;
+@compute @workgroup_size(16)
+fn main() {
+  ${t.params.must_use ? '_ = ' : ''}${t.params.op}(false);
+}`;
+
+    t.expectCompileResult(t.params.must_use, wgsl);
+  });
+
+const kTypes = objectsToRecord(kAllScalarsAndVectors);
+
+g.test('data_type')
+  .desc('Validates data parameter type')
+  .params(u => u.combine('type', keysOf(kTypes)).combine('op', kOps))
+  .beforeAllSubcases(t => {
+    const features = ['subgroups' as GPUFeatureName];
+    const type = kTypes[t.params.type];
+    if (type.requiresF16()) {
+      features.push('shader-f16');
+      features.push('subgroups-f16' as GPUFeatureName);
+    }
+    t.selectDeviceOrSkipTestCase(features);
+  })
+  .fn(t => {
+    const type = kTypes[t.params.type];
+    let enables = `enable subgroups;\n`;
+    if (type.requiresF16()) {
+      enables += `enable f16;\nenable subgroups_f16;`;
+    }
+    const wgsl = `
+${enables}
+@compute @workgroup_size(1)
+fn main() {
+  _ = ${t.params.op}(${type.create(0).wgsl()});
+}`;
+
+    t.expectCompileResult(type === Type.bool, wgsl);
+  });
+
+g.test('return_type')
+  .desc('Validates return type')
+  .params(u =>
+    u
+      .combine('type', keysOf(kTypes))
+      .filter(t => {
+        const type = kTypes[t.type];
+        const eleType = elementTypeOf(type);
+        return eleType !== Type.abstractInt && eleType !== Type.abstractFloat;
+      })
+      .combine('op', kOps)
+  )
+  .beforeAllSubcases(t => {
+    const features = ['subgroups' as GPUFeatureName];
+    const type = kTypes[t.params.type];
+    if (type.requiresF16()) {
+      features.push('shader-f16');
+      features.push('subgroups-f16' as GPUFeatureName);
+    }
+    t.selectDeviceOrSkipTestCase(features);
+  })
+  .fn(t => {
+    const type = kTypes[t.params.type];
+    let enables = `enable subgroups;\n`;
+    if (type.requiresF16()) {
+      enables += `enable f16;\nenable subgroups_f16;`;
+    }
+    const wgsl = `
+${enables}
+@compute @workgroup_size(1)
+fn main() {
+  let res : ${type.toString()} = ${t.params.op}(true);
+}`;
+
+    t.expectCompileResult(type === Type.bool, wgsl);
+  });
+
+g.test('stage')
+  .desc('validates builtin is only usable in the correct stages')
+  .params(u => u.combine('stage', ['compute', 'fragment', 'vertex'] as const).combine('op', kOps))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const compute = `
+@compute @workgroup_size(1)
+fn main() {
+  foo();
+}`;
+
+    const fragment = `
+@fragment
+fn main() {
+  foo();
+}`;
+
+    const vertex = `
+@vertex
+fn main() -> @builtin(position) vec4f {
+  foo();
+  return vec4f();
+}`;
+
+    const entry = { compute, fragment, vertex }[t.params.stage];
+    const wgsl = `
+enable subgroups;
+fn foo() {
+  _ = ${t.params.op}(true);
+}
+
+${entry}
+`;
+
+    t.expectCompileResult(t.params.stage !== 'vertex', wgsl);
+  });
diff --git a/src/webgpu/shader/validation/expression/call/builtin/subgroupBallot.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/subgroupBallot.spec.ts
index afbe33e93c56..5f53847be25c 100644
--- a/src/webgpu/shader/validation/expression/call/builtin/subgroupBallot.spec.ts
+++ b/src/webgpu/shader/validation/expression/call/builtin/subgroupBallot.spec.ts
@@ -9,6 +9,22 @@ import { ShaderValidationTest } from '../../../shader_validation_test.js';
 
 export const g = makeTestGroup(ShaderValidationTest);
 
+g.test('requires_subgroups')
+  .desc('Validates that the subgroups feature is required')
+  .params(u => u.combine('enable', [false, true] as const))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const wgsl = `
+${t.params.enable ? 'enable subgroups;' : ''}
+fn foo() {
+  _ = subgroupBallot(true);
+}`;
+
+    t.expectCompileResult(t.params.enable, wgsl);
+  });
+
 const kStages: Record<string, string> = {
   constant: `
 enable subgroups;
@@ -38,6 +54,23 @@ g.test('early_eval')
     t.expectCompileResult(t.params.stage === 'runtime', code);
   });
 
+g.test('must_use')
+  .desc('Tests that the builtin has the @must_use attribute')
+  .params(u => u.combine('must_use', [true, false] as const))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const wgsl = `
+enable subgroups;
+@compute @workgroup_size(16)
+fn main() {
+  ${t.params.must_use ? '_ = ' : ''}subgroupBallot(true);
+}`;
+
+    t.expectCompileResult(t.params.must_use, wgsl);
+  });
+
 const kArgumentTypes = objectsToRecord(kAllScalarsAndVectors);
 
 g.test('data_type')
@@ -69,7 +102,7 @@ fn main() {
   });
 
 g.test('return_type')
-  .desc('Validates data parameter type')
+  .desc('Validates return type')
   .params(u =>
     u.combine('type', keysOf(kArgumentTypes)).filter(t => {
       const type = kArgumentTypes[t.type];
diff --git a/src/webgpu/shader/validation/expression/call/builtin/subgroupBitwise.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/subgroupBitwise.spec.ts
new file mode 100644
index 000000000000..ca0dfb6fd719
--- /dev/null
+++ b/src/webgpu/shader/validation/expression/call/builtin/subgroupBitwise.spec.ts
@@ -0,0 +1,204 @@
+export const description = `
+Validation tests for subgroupAnd, subgroupOr, and subgroupXor.
+`;
+
+import { makeTestGroup } from '../../../../../../common/framework/test_group.js';
+import { keysOf, objectsToRecord } from '../../../../../../common/util/data_tables.js';
+import {
+  Type,
+  elementTypeOf,
+  kAllScalarsAndVectors,
+  isConvertible,
+} from '../../../../../util/conversion.js';
+import { ShaderValidationTest } from '../../../shader_validation_test.js';
+
+export const g = makeTestGroup(ShaderValidationTest);
+
+const kOps = ['subgroupAnd', 'subgroupOr', 'subgroupXor'] as const;
+
+g.test('requires_subgroups')
+  .desc('Validates that the subgroups feature is required')
+  .params(u => u.combine('enable', [false, true] as const).combine('op', kOps))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const wgsl = `
+${t.params.enable ? 'enable subgroups;' : ''}
+fn foo() {
+  _ = ${t.params.op}(0);
+}`;
+
+    t.expectCompileResult(t.params.enable, wgsl);
+  });
+
+const kStages: Record<string, (op: string) => string> = {
+  constant: (op: string) => {
+    return `
+enable subgroups;
+@compute @workgroup_size(16)
+fn main() {
+  const x = ${op}(0);
+}`;
+  },
+  override: (op: string) => {
+    return `
+enable subgroups
+override o = ${op}(0);`;
+  },
+  runtime: (op: string) => {
+    return `
+enable subgroups;
+@compute @workgroup_size(16)
+fn main() {
+  let x = ${op}(0);
+}`;
+  },
+};
+
+g.test('early_eval')
+  .desc('Ensures the builtin is not able to be compile time evaluated')
+  .params(u => u.combine('stage', keysOf(kStages)).combine('op', kOps))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const code = kStages[t.params.stage](t.params.op);
+    t.expectCompileResult(t.params.stage === 'runtime', code);
+  });
+
+g.test('must_use')
+  .desc('Tests that the builtin has the @must_use attribute')
+  .params(u => u.combine('must_use', [true, false] as const).combine('op', kOps))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const wgsl = `
+enable subgroups;
+@compute @workgroup_size(16)
+fn main() {
+  ${t.params.must_use ? '_ = ' : ''}${t.params.op}(0);
+}`;
+
+    t.expectCompileResult(t.params.must_use, wgsl);
+  });
+
+const kTypes = objectsToRecord(kAllScalarsAndVectors);
+
+g.test('data_type')
+  .desc('Validates data parameter type')
+  .params(u => u.combine('type', keysOf(kTypes)).combine('op', kOps))
+  .beforeAllSubcases(t => {
+    const features = ['subgroups' as GPUFeatureName];
+    const type = kTypes[t.params.type];
+    if (type.requiresF16()) {
+      features.push('shader-f16');
+      features.push('subgroups-f16' as GPUFeatureName);
+    }
+    t.selectDeviceOrSkipTestCase(features);
+  })
+  .fn(t => {
+    const type = kTypes[t.params.type];
+    let enables = `enable subgroups;\n`;
+    if (type.requiresF16()) {
+      enables += `enable f16;\nenable subgroups_f16;`;
+    }
+    const wgsl = `
+${enables}
+@compute @workgroup_size(1)
+fn main() {
+  _ = ${t.params.op}(${type.create(0).wgsl()});
+}`;
+
+    const eleType = elementTypeOf(type);
+    const expect = isConvertible(eleType, Type.u32) || isConvertible(eleType, Type.i32);
+    t.expectCompileResult(expect, wgsl);
+  });
+
+g.test('return_type')
+  .desc('Validates return type')
+  .params(u =>
+    u
+      .combine('retType', keysOf(kTypes))
+      .filter(t => {
+        const type = kTypes[t.retType];
+        const eleType = elementTypeOf(type);
+        return eleType !== Type.abstractInt && eleType !== Type.abstractFloat;
+      })
+      .combine('op', kOps)
+      .combine('paramType', keysOf(kTypes))
+  )
+  .beforeAllSubcases(t => {
+    const features = ['subgroups' as GPUFeatureName];
+    const retType = kTypes[t.params.retType];
+    const paramType = kTypes[t.params.paramType];
+    if (retType.requiresF16() || paramType.requiresF16()) {
+      features.push('shader-f16');
+      features.push('subgroups-f16' as GPUFeatureName);
+    }
+    t.selectDeviceOrSkipTestCase(features);
+  })
+  .fn(t => {
+    const retType = kTypes[t.params.retType];
+    const paramType = kTypes[t.params.paramType];
+    let enables = `enable subgroups;\n`;
+    if (retType.requiresF16() || paramType.requiresF16()) {
+      enables += `enable f16;\nenable subgroups_f16;`;
+    }
+    const wgsl = `
+${enables}
+@compute @workgroup_size(1)
+fn main() {
+  let res : ${retType.toString()} = ${t.params.op}(${paramType.create(0).wgsl()});
+}`;
+
+    // Can't just use isConvertible since functions must concretize the parameter
+    // type before examining the whole statement.
+    const eleParamType = elementTypeOf(paramType);
+    const eleRetType = elementTypeOf(retType);
+    let expect = paramType === retType && (eleRetType === Type.i32 || eleRetType === Type.u32);
+    if (eleParamType === Type.abstractInt) {
+      expect = eleRetType === Type.i32 && isConvertible(paramType, retType);
+    }
+    t.expectCompileResult(expect, wgsl);
+  });
+
+g.test('stage')
+  .desc('validates builtin is only usable in the correct stages')
+  .params(u => u.combine('stage', ['compute', 'fragment', 'vertex'] as const).combine('op', kOps))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const compute = `
+@compute @workgroup_size(1)
+fn main() {
+  foo();
+}`;
+
+    const fragment = `
+@fragment
+fn main() {
+  foo();
+}`;
+
+    const vertex = `
+@vertex
+fn main() -> @builtin(position) vec4f {
+  foo();
+  return vec4f();
+}`;
+
+    const entry = { compute, fragment, vertex }[t.params.stage];
+    const wgsl = `
+enable subgroups;
+fn foo() {
+  _ = ${t.params.op}(0);
+}
+
+${entry}
+`;
+
+    t.expectCompileResult(t.params.stage !== 'vertex', wgsl);
+  });
diff --git a/src/webgpu/shader/validation/expression/call/builtin/subgroupBroadcast.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/subgroupBroadcast.spec.ts
index a71b145092c8..fd76cd419b7f 100644
--- a/src/webgpu/shader/validation/expression/call/builtin/subgroupBroadcast.spec.ts
+++ b/src/webgpu/shader/validation/expression/call/builtin/subgroupBroadcast.spec.ts
@@ -14,6 +14,44 @@ import { ShaderValidationTest } from '../../../shader_validation_test.js';
 
 export const g = makeTestGroup(ShaderValidationTest);
 
+g.test('requires_subgroups')
+  .desc('Validates that the subgroups feature is required')
+  .params(u => u.combine('enable', [false, true] as const))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const wgsl = `
+${t.params.enable ? 'enable subgroups;' : ''}
+fn foo() {
+  _ = subgroupBroadcast(0, 0);
+}`;
+
+    t.expectCompileResult(t.params.enable, wgsl);
+  });
+
+g.test('requires_subgroups_f16')
+  .desc('Validates that the subgroups feature is required')
+  .params(u => u.combine('enable', [false, true] as const))
+  .beforeAllSubcases(t => {
+    const features: GPUFeatureName[] = ['shader-f16', 'subgroups' as GPUFeatureName];
+    if (t.params.enable) {
+      features.push('subgroups-f16' as GPUFeatureName);
+    }
+    t.selectDeviceOrSkipTestCase(features);
+  })
+  .fn(t => {
+    const wgsl = `
+enable f16;
+enable subgroups;
+${t.params.enable ? 'enable subgroups_f16;' : ''}
+fn foo() {
+  _ = subgroupBroadcast(0h, 0);
+}`;
+
+    t.expectCompileResult(t.params.enable, wgsl);
+  });
+
 const kArgumentTypes = objectsToRecord(kAllScalarsAndVectors);
 
 const kStages: Record<string, string> = {
@@ -156,6 +194,58 @@ fn main() {
     t.expectCompileResult(expect, wgsl);
   });
 
+const kIdCases = {
+  const_decl: {
+    code: 'const_decl',
+    valid: true,
+  },
+  const_literal: {
+    code: '0',
+    valid: true,
+  },
+  const_expr: {
+    code: 'const_decl + 2',
+    valid: true,
+  },
+  let_decl: {
+    code: 'let_decl',
+    valid: false,
+  },
+  override_decl: {
+    code: 'override_decl',
+    valid: false,
+  },
+  var_func_decl: {
+    code: 'var_func_decl',
+    valid: false,
+  },
+  var_priv_decl: {
+    code: 'var_priv_decl',
+    valid: false,
+  },
+};
+
+g.test('id_constness')
+  .desc('Validates that id must be a const-expression')
+  .params(u => u.combine('value', keysOf(kIdCases)))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const wgsl = `
+enable subgroups;
+override override_decl : u32;
+var<private> var_priv_decl : u32;
+fn foo() {
+  var var_func_decl : u32;
+  let let_decl = var_func_decl;
+  const const_decl = 0u;
+  _ = subgroupBroadcast(0, ${kIdCases[t.params.value].code});
+}`;
+
+    t.expectCompileResult(kIdCases[t.params.value].valid, wgsl);
+  });
+
 g.test('stage')
   .desc('Validates it is only usable in correct stage')
   .params(u => u.combine('stage', ['compute', 'fragment', 'vertex'] as const))
diff --git a/src/webgpu/shader/validation/expression/call/builtin/subgroupBroadcastFirst.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/subgroupBroadcastFirst.spec.ts
new file mode 100644
index 000000000000..4525b6b97ef8
--- /dev/null
+++ b/src/webgpu/shader/validation/expression/call/builtin/subgroupBroadcastFirst.spec.ts
@@ -0,0 +1,210 @@
+export const description = `
+Validation tests for subgroupBroadcastFirst
+`;
+
+import { makeTestGroup } from '../../../../../../common/framework/test_group.js';
+import { keysOf, objectsToRecord } from '../../../../../../common/util/data_tables.js';
+import { Type, elementTypeOf, kAllScalarsAndVectors } from '../../../../../util/conversion.js';
+import { ShaderValidationTest } from '../../../shader_validation_test.js';
+
+export const g = makeTestGroup(ShaderValidationTest);
+
+g.test('requires_subgroups')
+  .desc('Validates that the subgroups feature is required')
+  .params(u => u.combine('enable', [false, true] as const))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const wgsl = `
+${t.params.enable ? 'enable subgroups;' : ''}
+fn foo() {
+  _ = subgroupBroadcastFirst(0);
+}`;
+
+    t.expectCompileResult(t.params.enable, wgsl);
+  });
+
+g.test('requires_subgroups_f16')
+  .desc('Validates that the subgroups feature is required')
+  .params(u => u.combine('enable', [false, true] as const))
+  .beforeAllSubcases(t => {
+    const features: GPUFeatureName[] = ['shader-f16', 'subgroups' as GPUFeatureName];
+    if (t.params.enable) {
+      features.push('subgroups-f16' as GPUFeatureName);
+    }
+    t.selectDeviceOrSkipTestCase(features);
+  })
+  .fn(t => {
+    const wgsl = `
+enable f16;
+enable subgroups;
+${t.params.enable ? 'enable subgroups_f16;' : ''}
+fn foo() {
+  _ = subgroupBroadcastFirst(0h);
+}`;
+
+    t.expectCompileResult(t.params.enable, wgsl);
+  });
+
+const kArgumentTypes = objectsToRecord(kAllScalarsAndVectors);
+
+const kStages: Record<string, string> = {
+  constant: `
+enable subgroups;
+@compute @workgroup_size(16)
+fn main() {
+  const x = subgroupBroadcastFirst(0);
+}`,
+  override: `
+enable subgroups;
+override o = subgroupBroadcastFirst(0);`,
+  runtime: `
+enable subgroups;
+@compute @workgroup_size(16)
+fn main() {
+  let x = subgroupBroadcastFirst(0);
+}`,
+};
+
+g.test('early_eval')
+  .desc('Ensures the builtin is not able to be compile time evaluated')
+  .params(u => u.combine('stage', keysOf(kStages)))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const code = kStages[t.params.stage];
+    t.expectCompileResult(t.params.stage === 'runtime', code);
+  });
+
+g.test('must_use')
+  .desc('Tests that the builtin has the @must_use attribute')
+  .params(u => u.combine('must_use', [true, false] as const))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const wgsl = `
+enable subgroups;
+@compute @workgroup_size(16)
+fn main() {
+  ${t.params.must_use ? '_ = ' : ''}subgroupBroadcastFirst(0);
+}`;
+
+    t.expectCompileResult(t.params.must_use, wgsl);
+  });
+
+g.test('data_type')
+  .desc('Validates data parameter type')
+  .params(u => u.combine('type', keysOf(kArgumentTypes)))
+  .beforeAllSubcases(t => {
+    const features = ['subgroups' as GPUFeatureName];
+    const type = kArgumentTypes[t.params.type];
+    if (type.requiresF16()) {
+      features.push('subgroups-f16' as GPUFeatureName);
+      features.push('shader-f16');
+    }
+    t.selectDeviceOrSkipTestCase(features);
+  })
+  .fn(t => {
+    const type = kArgumentTypes[t.params.type];
+    let enables = `enable subgroups;\n`;
+    if (type.requiresF16()) {
+      enables += `enable subgroups_f16;\nenable f16;`;
+    }
+    const wgsl = `
+${enables}
+@compute @workgroup_size(1)
+fn main() {
+  _ = subgroupBroadcastFirst(${type.create(0).wgsl()});
+}`;
+
+    t.expectCompileResult(elementTypeOf(type) !== Type.bool, wgsl);
+  });
+
+g.test('return_type')
+  .desc('Validates data parameter type')
+  .params(u =>
+    u
+      .combine('dataType', keysOf(kArgumentTypes))
+      .combine('retType', keysOf(kArgumentTypes))
+      .filter(t => {
+        const retType = kArgumentTypes[t.retType];
+        const retEleTy = elementTypeOf(retType);
+        const dataType = kArgumentTypes[t.dataType];
+        const dataEleTy = elementTypeOf(dataType);
+        return (
+          retEleTy !== Type.abstractInt &&
+          retEleTy !== Type.abstractFloat &&
+          dataEleTy !== Type.abstractInt &&
+          dataEleTy !== Type.abstractFloat
+        );
+      })
+  )
+  .beforeAllSubcases(t => {
+    const features = ['subgroups' as GPUFeatureName];
+    const dataType = kArgumentTypes[t.params.dataType];
+    const retType = kArgumentTypes[t.params.retType];
+    if (dataType.requiresF16() || retType.requiresF16()) {
+      features.push('subgroups-f16' as GPUFeatureName);
+      features.push('shader-f16');
+    }
+    t.selectDeviceOrSkipTestCase(features);
+  })
+  .fn(t => {
+    const dataType = kArgumentTypes[t.params.dataType];
+    const retType = kArgumentTypes[t.params.retType];
+    let enables = `enable subgroups;\n`;
+    if (dataType.requiresF16() || retType.requiresF16()) {
+      enables += `enable subgroups_f16;\nenable f16;`;
+    }
+    const wgsl = `
+${enables}
+@compute @workgroup_size(1)
+fn main() {
+  let res : ${retType.toString()} = subgroupBroadcastFirst(${dataType.create(0).wgsl()});
+}`;
+
+    const expect = elementTypeOf(dataType) !== Type.bool && dataType === retType;
+    t.expectCompileResult(expect, wgsl);
+  });
+
+g.test('stage')
+  .desc('Validates it is only usable in correct stage')
+  .params(u => u.combine('stage', ['compute', 'fragment', 'vertex'] as const))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const compute = `
+@compute @workgroup_size(1)
+fn main() {
+  foo();
+}`;
+
+    const fragment = `
+@fragment
+fn main() {
+  foo();
+}`;
+
+    const vertex = `
+@vertex
+fn main() -> @builtin(position) vec4f {
+  foo();
+  return vec4f();
+}`;
+
+    const entry = { compute, fragment, vertex }[t.params.stage];
+    const wgsl = `
+enable subgroups;
+fn foo() {
+  _ = subgroupBroadcastFirst(0);
+}
+
+${entry}
+`;
+
+    t.expectCompileResult(t.params.stage !== 'vertex', wgsl);
+  });
diff --git a/src/webgpu/shader/validation/expression/call/builtin/subgroupElect.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/subgroupElect.spec.ts
new file mode 100644
index 000000000000..5637860c59ce
--- /dev/null
+++ b/src/webgpu/shader/validation/expression/call/builtin/subgroupElect.spec.ts
@@ -0,0 +1,175 @@
+export const description = `
+Validation tests for subgroupElect.
+`;
+
+import { makeTestGroup } from '../../../../../../common/framework/test_group.js';
+import { keysOf, objectsToRecord } from '../../../../../../common/util/data_tables.js';
+import { Type, elementTypeOf, kAllScalarsAndVectors } from '../../../../../util/conversion.js';
+import { ShaderValidationTest } from '../../../shader_validation_test.js';
+
+export const g = makeTestGroup(ShaderValidationTest);
+
+g.test('requires_subgroups')
+  .desc('Validates that the subgroups feature is required')
+  .params(u => u.combine('enable', [false, true] as const))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const wgsl = `
+${t.params.enable ? 'enable subgroups;' : ''}
+fn foo() {
+  _ = subgroupElect();
+}`;
+
+    t.expectCompileResult(t.params.enable, wgsl);
+  });
+
+const kStages: Record<string, string> = {
+  constant: `
+enable subgroups;
+@compute @workgroup_size(16)
+fn main() {
+  const x = subgroupElect();
+}`,
+  override: `
+enable subgroups
+override o = select(0, 1, subgroupElect());`,
+  runtime: `
+enable subgroups;
+@compute @workgroup_size(16)
+fn main() {
+  let x = subgroupElect();
+}`,
+};
+
+g.test('early_eval')
+  .desc('Ensures the builtin is not able to be compile time evaluated')
+  .params(u => u.combine('stage', keysOf(kStages)))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const code = kStages[t.params.stage];
+    t.expectCompileResult(t.params.stage === 'runtime', code);
+  });
+
+g.test('must_use')
+  .desc('Tests that the builtin has the @must_use attribute')
+  .params(u => u.combine('must_use', [true, false] as const))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const wgsl = `
+enable subgroups;
+@compute @workgroup_size(16)
+fn main() {
+  ${t.params.must_use ? '_ = ' : ''}subgroupElect();
+}`;
+
+    t.expectCompileResult(t.params.must_use, wgsl);
+  });
+
+const kTypes = objectsToRecord(kAllScalarsAndVectors);
+
+g.test('data_type')
+  .desc('Validates there are no valid data parameters')
+  .params(u => u.combine('type', keysOf(kTypes)))
+  .beforeAllSubcases(t => {
+    const features = ['subgroups' as GPUFeatureName];
+    const type = kTypes[t.params.type];
+    if (type.requiresF16()) {
+      features.push('shader-f16');
+      features.push('subgroups-f16' as GPUFeatureName);
+    }
+    t.selectDeviceOrSkipTestCase(features);
+  })
+  .fn(t => {
+    const type = kTypes[t.params.type];
+    let enables = `enable subgroups;\n`;
+    if (type.requiresF16()) {
+      enables += `enable f16;\nenable subgroups_f16;`;
+    }
+    const wgsl = `
+${enables}
+@compute @workgroup_size(1)
+fn main() {
+  _ = subgroupElect(${type.create(0).wgsl()});
+}`;
+
+    t.expectCompileResult(false, wgsl);
+  });
+
+g.test('return_type')
+  .desc('Validates return type')
+  .params(u =>
+    u.combine('type', keysOf(kTypes)).filter(t => {
+      const type = kTypes[t.type];
+      const eleType = elementTypeOf(type);
+      return eleType !== Type.abstractInt && eleType !== Type.abstractFloat;
+    })
+  )
+  .beforeAllSubcases(t => {
+    const features = ['subgroups' as GPUFeatureName];
+    const type = kTypes[t.params.type];
+    if (type.requiresF16()) {
+      features.push('shader-f16');
+      features.push('subgroups-f16' as GPUFeatureName);
+    }
+    t.selectDeviceOrSkipTestCase(features);
+  })
+  .fn(t => {
+    const type = kTypes[t.params.type];
+    let enables = `enable subgroups;\n`;
+    if (type.requiresF16()) {
+      enables += `enable f16;\nenable subgroups_f16;`;
+    }
+    const wgsl = `
+${enables}
+@compute @workgroup_size(1)
+fn main() {
+  let res : ${type.toString()} = subgroupElect();
+}`;
+
+    t.expectCompileResult(type === Type.bool, wgsl);
+  });
+
+g.test('stage')
+  .desc('validates builtin is only usable in the correct stages')
+  .params(u => u.combine('stage', ['compute', 'fragment', 'vertex'] as const))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const compute = `
+@compute @workgroup_size(1)
+fn main() {
+  foo();
+}`;
+
+    const fragment = `
+@fragment
+fn main() {
+  foo();
+}`;
+
+    const vertex = `
+@vertex
+fn main() -> @builtin(position) vec4f {
+  foo();
+  return vec4f();
+}`;
+
+    const entry = { compute, fragment, vertex }[t.params.stage];
+    const wgsl = `
+enable subgroups;
+fn foo() {
+  _ = subgroupElect();
+}
+
+${entry}
+`;
+
+    t.expectCompileResult(t.params.stage !== 'vertex', wgsl);
+  });
diff --git a/src/webgpu/shader/validation/expression/call/builtin/subgroupMinMax.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/subgroupMinMax.spec.ts
new file mode 100644
index 000000000000..84c1860019ee
--- /dev/null
+++ b/src/webgpu/shader/validation/expression/call/builtin/subgroupMinMax.spec.ts
@@ -0,0 +1,227 @@
+export const description = `
+Validation tests for subgroupMin and subgroupMax.
+`;
+
+import { makeTestGroup } from '../../../../../../common/framework/test_group.js';
+import { keysOf, objectsToRecord } from '../../../../../../common/util/data_tables.js';
+import {
+  Type,
+  elementTypeOf,
+  kAllScalarsAndVectors,
+  isConvertible,
+} from '../../../../../util/conversion.js';
+import { ShaderValidationTest } from '../../../shader_validation_test.js';
+
+export const g = makeTestGroup(ShaderValidationTest);
+
+const kOps = ['subgroupMin', 'subgroupMax'] as const;
+
+g.test('requires_subgroups')
+  .desc('Validates that the subgroups feature is required')
+  .params(u => u.combine('enable', [false, true] as const).combine('op', kOps))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const wgsl = `
+${t.params.enable ? 'enable subgroups;' : ''}
+fn foo() {
+  _ = ${t.params.op}(0);
+}`;
+
+    t.expectCompileResult(t.params.enable, wgsl);
+  });
+
+g.test('requires_subgroups_f16')
+  .desc('Validates that the subgroups feature is required')
+  .params(u => u.combine('enable', [false, true] as const).combine('op', kOps))
+  .beforeAllSubcases(t => {
+    const features: GPUFeatureName[] = ['shader-f16', 'subgroups' as GPUFeatureName];
+    if (t.params.enable) {
+      features.push('subgroups-f16' as GPUFeatureName);
+    }
+    t.selectDeviceOrSkipTestCase(features);
+  })
+  .fn(t => {
+    const wgsl = `
+enable f16;
+enable subgroups;
+${t.params.enable ? 'enable subgroups_f16;' : ''}
+fn foo() {
+  _ = ${t.params.op}(0h);
+}`;
+
+    t.expectCompileResult(t.params.enable, wgsl);
+  });
+
+const kStages: Record<string, (op: string) => string> = {
+  constant: (op: string) => {
+    return `
+enable subgroups;
+@compute @workgroup_size(16)
+fn main() {
+  const x = ${op}(0);
+}`;
+  },
+  override: (op: string) => {
+    return `
+enable subgroups
+override o = ${op}(0);`;
+  },
+  runtime: (op: string) => {
+    return `
+enable subgroups;
+@compute @workgroup_size(16)
+fn main() {
+  let x = ${op}(0);
+}`;
+  },
+};
+
+g.test('early_eval')
+  .desc('Ensures the builtin is not able to be compile time evaluated')
+  .params(u => u.combine('stage', keysOf(kStages)).combine('op', kOps))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const code = kStages[t.params.stage](t.params.op);
+    t.expectCompileResult(t.params.stage === 'runtime', code);
+  });
+
+g.test('must_use')
+  .desc('Tests that the builtin has the @must_use attribute')
+  .params(u => u.combine('must_use', [true, false] as const).combine('op', kOps))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const wgsl = `
+enable subgroups;
+@compute @workgroup_size(16)
+fn main() {
+  ${t.params.must_use ? '_ = ' : ''}${t.params.op}(0);
+}`;
+
+    t.expectCompileResult(t.params.must_use, wgsl);
+  });
+
+const kTypes = objectsToRecord(kAllScalarsAndVectors);
+
+g.test('data_type')
+  .desc('Validates data parameter type')
+  .params(u => u.combine('type', keysOf(kTypes)).combine('op', kOps))
+  .beforeAllSubcases(t => {
+    const features = ['subgroups' as GPUFeatureName];
+    const type = kTypes[t.params.type];
+    if (type.requiresF16()) {
+      features.push('shader-f16');
+      features.push('subgroups-f16' as GPUFeatureName);
+    }
+    t.selectDeviceOrSkipTestCase(features);
+  })
+  .fn(t => {
+    const type = kTypes[t.params.type];
+    let enables = `enable subgroups;\n`;
+    if (type.requiresF16()) {
+      enables += `enable f16;\nenable subgroups_f16;`;
+    }
+    const wgsl = `
+${enables}
+@compute @workgroup_size(1)
+fn main() {
+  _ = ${t.params.op}(${type.create(0).wgsl()});
+}`;
+
+    const eleType = elementTypeOf(type);
+    t.expectCompileResult(eleType !== Type.bool, wgsl);
+  });
+
+g.test('return_type')
+  .desc('Validates return type')
+  .params(u =>
+    u
+      .combine('retType', keysOf(kTypes))
+      .filter(t => {
+        const type = kTypes[t.retType];
+        const eleType = elementTypeOf(type);
+        return eleType !== Type.abstractInt && eleType !== Type.abstractFloat;
+      })
+      .combine('op', kOps)
+      .combine('paramType', keysOf(kTypes))
+  )
+  .beforeAllSubcases(t => {
+    const features = ['subgroups' as GPUFeatureName];
+    const retType = kTypes[t.params.retType];
+    const paramType = kTypes[t.params.paramType];
+    if (retType.requiresF16() || paramType.requiresF16()) {
+      features.push('shader-f16');
+      features.push('subgroups-f16' as GPUFeatureName);
+    }
+    t.selectDeviceOrSkipTestCase(features);
+  })
+  .fn(t => {
+    const retType = kTypes[t.params.retType];
+    const paramType = kTypes[t.params.paramType];
+    let enables = `enable subgroups;\n`;
+    if (retType.requiresF16() || paramType.requiresF16()) {
+      enables += `enable f16;\nenable subgroups_f16;`;
+    }
+    const wgsl = `
+${enables}
+@compute @workgroup_size(1)
+fn main() {
+  let res : ${retType.toString()} = ${t.params.op}(${paramType.create(0).wgsl()});
+}`;
+
+    // Can't just use isConvertible since functions must concretize the parameter
+    // type before examining the whole statement.
+    const eleParamType = elementTypeOf(paramType);
+    const eleRetType = elementTypeOf(retType);
+    let expect = paramType === retType && eleRetType !== Type.bool;
+    if (eleParamType === Type.abstractInt) {
+      expect = eleRetType === Type.i32 && isConvertible(paramType, retType);
+    } else if (eleParamType === Type.abstractFloat) {
+      expect = eleRetType === Type.f32 && isConvertible(paramType, retType);
+    }
+    t.expectCompileResult(expect, wgsl);
+  });
+
+g.test('stage')
+  .desc('validates builtin is only usable in the correct stages')
+  .params(u => u.combine('stage', ['compute', 'fragment', 'vertex'] as const).combine('op', kOps))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const compute = `
+@compute @workgroup_size(1)
+fn main() {
+  foo();
+}`;
+
+    const fragment = `
+@fragment
+fn main() {
+  foo();
+}`;
+
+    const vertex = `
+@vertex
+fn main() -> @builtin(position) vec4f {
+  foo();
+  return vec4f();
+}`;
+
+    const entry = { compute, fragment, vertex }[t.params.stage];
+    const wgsl = `
+enable subgroups;
+fn foo() {
+  _ = ${t.params.op}(0);
+}
+
+${entry}
+`;
+
+    t.expectCompileResult(t.params.stage !== 'vertex', wgsl);
+  });
diff --git a/src/webgpu/shader/validation/expression/call/builtin/subgroupMul.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/subgroupMul.spec.ts
new file mode 100644
index 000000000000..0b50d4c9df2d
--- /dev/null
+++ b/src/webgpu/shader/validation/expression/call/builtin/subgroupMul.spec.ts
@@ -0,0 +1,235 @@
+export const description = `
+Validation tests for subgroupMul, subgroupExclusiveMul, and subgroupInclusiveMul
+`;
+
+import { makeTestGroup } from '../../../../../../common/framework/test_group.js';
+import { keysOf, objectsToRecord } from '../../../../../../common/util/data_tables.js';
+import { Type, elementTypeOf, kAllScalarsAndVectors } from '../../../../../util/conversion.js';
+import { ShaderValidationTest } from '../../../shader_validation_test.js';
+
+export const g = makeTestGroup(ShaderValidationTest);
+
+const kBuiltins = ['subgroupMul', 'subgroupExclusiveMul', 'subgroupInclusiveMul'] as const;
+
+const kStages: Record<string, (builtin: string) => string> = {
+  constant: (builtin: string) => {
+    return `
+enable subgroups;
+@compute @workgroup_size(16)
+fn main() {
+  const x = ${builtin}(0);
+}`;
+  },
+  override: (builtin: string) => {
+    return `
+enable subgroups;
+override o = ${builtin}(0);`;
+  },
+  runtime: (builtin: string) => {
+    return `
+enable subgroups;
+@compute @workgroup_size(16)
+fn main() {
+  let x = ${builtin}(0);
+}`;
+  },
+};
+
+g.test('early_eval')
+  .desc('Ensures the builtin is not able to be compile time evaluated')
+  .params(u => u.combine('stage', keysOf(kStages)).beginSubcases().combine('builtin', kBuiltins))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const code = kStages[t.params.stage](t.params.builtin);
+    t.expectCompileResult(t.params.stage === 'runtime', code);
+  });
+
+g.test('must_use')
+  .desc('Tests that the builtin has the @must_use attribute')
+  .params(u =>
+    u
+      .combine('must_use', [true, false] as const)
+      .beginSubcases()
+      .combine('builtin', kBuiltins)
+  )
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const wgsl = `
+enable subgroups;
+@compute @workgroup_size(16)
+fn main() {
+  ${t.params.must_use ? '_ = ' : ''}${t.params.builtin}(0);
+}`;
+
+    t.expectCompileResult(t.params.must_use, wgsl);
+  });
+
+const kArgumentTypes = objectsToRecord(kAllScalarsAndVectors);
+
+g.test('data_type')
+  .desc('Validates data parameter type')
+  .params(u =>
+    u.combine('type', keysOf(kArgumentTypes)).beginSubcases().combine('builtin', kBuiltins)
+  )
+  .beforeAllSubcases(t => {
+    const features = ['subgroups' as GPUFeatureName];
+    const type = kArgumentTypes[t.params.type];
+    if (type.requiresF16()) {
+      features.push('subgroups-f16' as GPUFeatureName);
+      features.push('shader-f16');
+    }
+    t.selectDeviceOrSkipTestCase(features);
+  })
+  .fn(t => {
+    const type = kArgumentTypes[t.params.type];
+    let enables = `enable subgroups;\n`;
+    if (type.requiresF16()) {
+      enables += `enable subgroups_f16;\nenable f16;`;
+    }
+    const wgsl = `
+${enables}
+@compute @workgroup_size(1)
+fn main() {
+  _ = ${t.params.builtin}(${type.create(0).wgsl()});
+}`;
+
+    t.expectCompileResult(elementTypeOf(type) !== Type.bool, wgsl);
+  });
+
+g.test('return_type')
+  .desc('Validates data parameter type')
+  .params(u =>
+    u
+      .combine('dataType', keysOf(kArgumentTypes))
+      .combine('retType', keysOf(kArgumentTypes))
+      .filter(t => {
+        const retType = kArgumentTypes[t.retType];
+        const retEleTy = elementTypeOf(retType);
+        const dataType = kArgumentTypes[t.dataType];
+        const dataEleTy = elementTypeOf(dataType);
+        return (
+          retEleTy !== Type.abstractInt &&
+          retEleTy !== Type.abstractFloat &&
+          dataEleTy !== Type.abstractInt &&
+          dataEleTy !== Type.abstractFloat
+        );
+      })
+      .beginSubcases()
+      .combine('builtin', kBuiltins)
+  )
+  .beforeAllSubcases(t => {
+    const features = ['subgroups' as GPUFeatureName];
+    const dataType = kArgumentTypes[t.params.dataType];
+    const retType = kArgumentTypes[t.params.retType];
+    if (dataType.requiresF16() || retType.requiresF16()) {
+      features.push('subgroups-f16' as GPUFeatureName);
+      features.push('shader-f16');
+    }
+    t.selectDeviceOrSkipTestCase(features);
+  })
+  .fn(t => {
+    const dataType = kArgumentTypes[t.params.dataType];
+    const retType = kArgumentTypes[t.params.retType];
+    let enables = `enable subgroups;\n`;
+    if (dataType.requiresF16() || retType.requiresF16()) {
+      enables += `enable subgroups_f16;\nenable f16;`;
+    }
+    const wgsl = `
+${enables}
+@compute @workgroup_size(1)
+fn main() {
+  let res : ${retType.toString()} = ${t.params.builtin}(${dataType.create(0).wgsl()});
+}`;
+
+    const expect = elementTypeOf(dataType) !== Type.bool && dataType === retType;
+    t.expectCompileResult(expect, wgsl);
+  });
+
+g.test('stage')
+  .desc('Validates it is only usable in correct stage')
+  .params(u =>
+    u
+      .combine('stage', ['compute', 'fragment', 'vertex'] as const)
+      .beginSubcases()
+      .combine('builtin', kBuiltins)
+  )
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const compute = `
+@compute @workgroup_size(1)
+fn main() {
+  foo();
+}`;
+
+    const fragment = `
+@fragment
+fn main() {
+  foo();
+}`;
+
+    const vertex = `
+@vertex
+fn main() -> @builtin(position) vec4f {
+  foo();
+  return vec4f();
+}`;
+
+    const entry = { compute, fragment, vertex }[t.params.stage];
+    const wgsl = `
+enable subgroups;
+fn foo() {
+  _ = ${t.params.builtin}(0);
+}
+
+${entry}
+`;
+
+    t.expectCompileResult(t.params.stage !== 'vertex', wgsl);
+  });
+
+const kInvalidTypeCases: Record<string, string> = {
+  array_u32: `array(1u,2u,3u)`,
+  array_f32: `array<f32, 4>()`,
+  struct_s: `S()`,
+  struct_t: `T(1, 1)`,
+  ptr_func: `&func_var`,
+  ptr_priv: `&priv_var`,
+  frexp_ret: `frexp(0)`,
+};
+
+g.test('invalid_types')
+  .desc('Tests that invalid non-plain types are rejected')
+  .params(u =>
+    u.combine('case', keysOf(kInvalidTypeCases)).beginSubcases().combine('builtin', kBuiltins)
+  )
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const val = kInvalidTypeCases[t.params.case];
+    const wgsl = `
+enable subgroups;
+
+struct S {
+  x : u32
+}
+
+struct T {
+  a : f32,
+  b : u32,
+}
+
+var<private> priv_var : f32;
+fn foo() {
+  var func_var : vec4u;
+  _ = ${t.params.builtin}(${val});
+}`;
+
+    t.expectCompileResult(false, wgsl);
+  });
diff --git a/src/webgpu/shader/validation/expression/call/builtin/subgroupShuffle.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/subgroupShuffle.spec.ts
new file mode 100644
index 000000000000..62ffb5af36dd
--- /dev/null
+++ b/src/webgpu/shader/validation/expression/call/builtin/subgroupShuffle.spec.ts
@@ -0,0 +1,262 @@
+export const description = `
+Validation tests for subgroupShuffle, subgroupShuffleXor, subgroupShuffleUp, and subgroupShuffleDown.
+`;
+
+import { makeTestGroup } from '../../../../../../common/framework/test_group.js';
+import { keysOf, objectsToRecord } from '../../../../../../common/util/data_tables.js';
+import {
+  Type,
+  elementTypeOf,
+  kAllScalarsAndVectors,
+  isConvertible,
+} from '../../../../../util/conversion.js';
+import { ShaderValidationTest } from '../../../shader_validation_test.js';
+
+export const g = makeTestGroup(ShaderValidationTest);
+
+const kOps = [
+  'subgroupShuffle',
+  'subgroupShuffleXor',
+  'subgroupShuffleUp',
+  'subgroupShuffleDown',
+] as const;
+
+g.test('requires_subgroups')
+  .desc('Validates that the subgroups feature is required')
+  .params(u => u.combine('enable', [false, true] as const).combine('op', kOps))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const wgsl = `
+${t.params.enable ? 'enable subgroups;' : ''}
+fn foo() {
+  _ = ${t.params.op}(0, 0);
+}`;
+
+    t.expectCompileResult(t.params.enable, wgsl);
+  });
+
+g.test('requires_subgroups_f16')
+  .desc('Validates that the subgroups feature is required')
+  .params(u => u.combine('enable', [false, true] as const).combine('op', kOps))
+  .beforeAllSubcases(t => {
+    const features: GPUFeatureName[] = ['shader-f16', 'subgroups' as GPUFeatureName];
+    if (t.params.enable) {
+      features.push('subgroups-f16' as GPUFeatureName);
+    }
+    t.selectDeviceOrSkipTestCase(features);
+  })
+  .fn(t => {
+    const wgsl = `
+enable f16;
+enable subgroups;
+${t.params.enable ? 'enable subgroups_f16;' : ''}
+fn foo() {
+  _ = ${t.params.op}(0h, 0);
+}`;
+
+    t.expectCompileResult(t.params.enable, wgsl);
+  });
+
+const kStages: Record<string, (op: string) => string> = {
+  constant: (op: string) => {
+    return `
+enable subgroups;
+@compute @workgroup_size(16)
+fn main() {
+  const x = ${op}(0, 0);
+}`;
+  },
+  override: (op: string) => {
+    return `
+enable subgroups
+override o = ${op}(0, 0);`;
+  },
+  runtime: (op: string) => {
+    return `
+enable subgroups;
+@compute @workgroup_size(16)
+fn main() {
+  let x = ${op}(0, 0);
+}`;
+  },
+};
+
+g.test('early_eval')
+  .desc('Ensures the builtin is not able to be compile time evaluated')
+  .params(u => u.combine('stage', keysOf(kStages)).combine('op', kOps))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const code = kStages[t.params.stage](t.params.op);
+    t.expectCompileResult(t.params.stage === 'runtime', code);
+  });
+
+g.test('must_use')
+  .desc('Tests that the builtin has the @must_use attribute')
+  .params(u => u.combine('must_use', [true, false] as const).combine('op', kOps))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const wgsl = `
+enable subgroups;
+@compute @workgroup_size(16)
+fn main() {
+  ${t.params.must_use ? '_ = ' : ''}${t.params.op}(0, 0);
+}`;
+
+    t.expectCompileResult(t.params.must_use, wgsl);
+  });
+
+const kTypes = objectsToRecord(kAllScalarsAndVectors);
+
+g.test('data_type')
+  .desc('Validates data parameter type')
+  .params(u => u.combine('type', keysOf(kTypes)).combine('op', kOps))
+  .beforeAllSubcases(t => {
+    const features = ['subgroups' as GPUFeatureName];
+    const type = kTypes[t.params.type];
+    if (type.requiresF16()) {
+      features.push('shader-f16');
+      features.push('subgroups-f16' as GPUFeatureName);
+    }
+    t.selectDeviceOrSkipTestCase(features);
+  })
+  .fn(t => {
+    const type = kTypes[t.params.type];
+    let enables = `enable subgroups;\n`;
+    if (type.requiresF16()) {
+      enables += `enable f16;\nenable subgroups_f16;`;
+    }
+    const wgsl = `
+${enables}
+@compute @workgroup_size(1)
+fn main() {
+  _ = ${t.params.op}(${type.create(0).wgsl()}, 0);
+}`;
+
+    const eleType = elementTypeOf(type);
+    t.expectCompileResult(eleType !== Type.bool, wgsl);
+  });
+
+g.test('return_type')
+  .desc('Validates return type')
+  .params(u =>
+    u
+      .combine('retType', keysOf(kTypes))
+      .filter(t => {
+        const type = kTypes[t.retType];
+        const eleType = elementTypeOf(type);
+        return eleType !== Type.abstractInt && eleType !== Type.abstractFloat;
+      })
+      .combine('op', kOps)
+      .combine('paramType', keysOf(kTypes))
+  )
+  .beforeAllSubcases(t => {
+    const features = ['subgroups' as GPUFeatureName];
+    const retType = kTypes[t.params.retType];
+    const paramType = kTypes[t.params.paramType];
+    if (retType.requiresF16() || paramType.requiresF16()) {
+      features.push('shader-f16');
+      features.push('subgroups-f16' as GPUFeatureName);
+    }
+    t.selectDeviceOrSkipTestCase(features);
+  })
+  .fn(t => {
+    const retType = kTypes[t.params.retType];
+    const paramType = kTypes[t.params.paramType];
+    let enables = `enable subgroups;\n`;
+    if (retType.requiresF16() || paramType.requiresF16()) {
+      enables += `enable f16;\nenable subgroups_f16;`;
+    }
+    const wgsl = `
+${enables}
+@compute @workgroup_size(1)
+fn main() {
+  let res : ${retType.toString()} = ${t.params.op}(${paramType.create(0).wgsl()}, 0);
+}`;
+
+    // Can't just use isConvertible since functions must concretize the parameter
+    // type before examining the whole statement.
+    const eleParamType = elementTypeOf(paramType);
+    const eleRetType = elementTypeOf(retType);
+    let expect = paramType === retType && eleRetType !== Type.bool;
+    if (eleParamType === Type.abstractInt) {
+      expect = eleRetType === Type.i32 && isConvertible(paramType, retType);
+    } else if (eleParamType === Type.abstractFloat) {
+      expect = eleRetType === Type.f32 && isConvertible(paramType, retType);
+    }
+    t.expectCompileResult(expect, wgsl);
+  });
+
+g.test('param2_type')
+  .desc('Validates shuffle parameter type')
+  .params(u => u.combine('type', keysOf(kTypes)).combine('op', kOps))
+  .beforeAllSubcases(t => {
+    const features = ['subgroups' as GPUFeatureName];
+    const type = kTypes[t.params.type];
+    if (type.requiresF16()) {
+      features.push('shader-f16');
+      features.push('subgroups-f16' as GPUFeatureName);
+    }
+    t.selectDeviceOrSkipTestCase(features);
+  })
+  .fn(t => {
+    const type = kTypes[t.params.type];
+    let enables = `enable subgroups;\n`;
+    if (type.requiresF16()) {
+      enables += `enable f16;\nenable subgroups_f16;`;
+    }
+    const wgsl = `
+${enables}
+@compute @workgroup_size(1)
+fn main() {
+  _ = ${t.params.op}(0, ${type.create(0).wgsl()});
+}`;
+
+    const expect =
+      isConvertible(type, Type.u32) || (type === Type.i32 && t.params.op === 'subgroupShuffle');
+    t.expectCompileResult(expect, wgsl);
+  });
+
+g.test('stage')
+  .desc('validates builtin is only usable in the correct stages')
+  .params(u => u.combine('stage', ['compute', 'fragment', 'vertex'] as const).combine('op', kOps))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const compute = `
+@compute @workgroup_size(1)
+fn main() {
+  foo();
+}`;
+
+    const fragment = `
+@fragment
+fn main() {
+  foo();
+}`;
+
+    const vertex = `
+@vertex
+fn main() -> @builtin(position) vec4f {
+  foo();
+  return vec4f();
+}`;
+
+    const entry = { compute, fragment, vertex }[t.params.stage];
+    const wgsl = `
+enable subgroups;
+fn foo() {
+  _ = ${t.params.op}(0, 0);
+}
+
+${entry}
+`;
+
+    t.expectCompileResult(t.params.stage !== 'vertex', wgsl);
+  });
diff --git a/src/webgpu/shader/validation/expression/matrix/add_sub.spec.ts b/src/webgpu/shader/validation/expression/matrix/add_sub.spec.ts
index 85bed5228482..d162ba3286b9 100644
--- a/src/webgpu/shader/validation/expression/matrix/add_sub.spec.ts
+++ b/src/webgpu/shader/validation/expression/matrix/add_sub.spec.ts
@@ -275,7 +275,7 @@ g.test('underflow_f16')
     let rhs = `mat${t.params.c}x${t.params.r}h(`;
     for (let i = 0; i < t.params.c; i++) {
       for (let k = 0; k < t.params.r; k++) {
-        lhs += `${kValue.f32.negative.min / 2},`;
+        lhs += `${kValue.f16.negative.min / 2},`;
         rhs += `${t.params.rhs},`;
       }
     }
diff --git a/src/webgpu/shader/validation/expression/matrix/mul.spec.ts b/src/webgpu/shader/validation/expression/matrix/mul.spec.ts
index e76e40265e09..a3a5d368dc2d 100644
--- a/src/webgpu/shader/validation/expression/matrix/mul.spec.ts
+++ b/src/webgpu/shader/validation/expression/matrix/mul.spec.ts
@@ -631,7 +631,7 @@ g.test('overflow_mat_f16_internal')
     for (let i = 0; i < t.params.c; i++) {
       for (let k = 0; k < t.params.r; k++) {
         lhs += `${t.params.lhs},`;
-        rhs += `1`;
+        rhs += `1,`;
       }
     }
     rhs += ')';
diff --git a/src/webgpu/shader/validation/extension/clip_distances.spec.ts b/src/webgpu/shader/validation/extension/clip_distances.spec.ts
new file mode 100644
index 000000000000..88957d8e8e62
--- /dev/null
+++ b/src/webgpu/shader/validation/extension/clip_distances.spec.ts
@@ -0,0 +1,43 @@
+export const description = `
+Validation tests for the clip_distances extension
+`;
+
+import { makeTestGroup } from '../../../../common/framework/test_group.js';
+import { ShaderValidationTest } from '../shader_validation_test.js';
+
+export const g = makeTestGroup(ShaderValidationTest);
+
+g.test('use_clip_distances_requires_extension_enabled')
+  .desc(
+    `Checks that the clip_distances built-in variable is only allowed with the WGSL extension
+     clip_distances enabled in shader and the WebGPU extension clip-distances supported on the
+     device.`
+  )
+  .params(u =>
+    u.combine('requireExtension', [true, false]).combine('enableExtension', [true, false])
+  )
+  .beforeAllSubcases(t => {
+    if (t.params.requireExtension) {
+      t.selectDeviceOrSkipTestCase({ requiredFeatures: ['clip-distances'] });
+    }
+  })
+  .fn(t => {
+    const { requireExtension, enableExtension } = t.params;
+
+    t.expectCompileResult(
+      requireExtension && enableExtension,
+      `
+        ${enableExtension ? 'enable clip_distances;' : ''}
+        struct VertexOut {
+          @builtin(clip_distances) my_clip_distances : array<f32, 1>,
+          @builtin(position) my_position : vec4f,
+        }
+        @vertex fn main() -> VertexOut {
+          var output : VertexOut;
+          output.my_clip_distances[0] = 1.0;
+          output.my_position = vec4f(0.0, 0.0, 0.0, 1.0);
+          return output;
+        }
+    `
+    );
+  });
diff --git a/src/webgpu/shader/validation/parse/identifiers.spec.ts b/src/webgpu/shader/validation/parse/identifiers.spec.ts
index 0dd429d0a72c..4a7ec70120ff 100644
--- a/src/webgpu/shader/validation/parse/identifiers.spec.ts
+++ b/src/webgpu/shader/validation/parse/identifiers.spec.ts
@@ -199,6 +199,8 @@ const kInvalidIdentifiers = new Set([
   'noexcept',
   'noinline',
   'nointerpolation',
+  'non_coherent',
+  'noncoherent',
   'noperspective',
   'null',
   'nullptr',
diff --git a/src/webgpu/shader/validation/shader_io/builtins.spec.ts b/src/webgpu/shader/validation/shader_io/builtins.spec.ts
index 85a30fa0ec60..3d01f8f23a3e 100644
--- a/src/webgpu/shader/validation/shader_io/builtins.spec.ts
+++ b/src/webgpu/shader/validation/shader_io/builtins.spec.ts
@@ -10,7 +10,7 @@ export const g = makeTestGroup(ShaderValidationTest);
 
 // List of all built-in variables and their stage, in|out usage, and type.
 // Taken from table in Section 15:
-// https://www.w3.org/TR/2021/WD-WGSL-20211013/#builtin-variables
+// https://www.w3.org/TR/WGSL/#builtin-inputs-outputs
 export const kBuiltins = [
   { name: 'vertex_index', stage: 'vertex', io: 'in', type: 'u32' },
   { name: 'instance_index', stage: 'vertex', io: 'in', type: 'u32' },
@@ -30,6 +30,14 @@ export const kBuiltins = [
   { name: 'subgroup_size', stage: 'compute', io: 'in', type: 'u32' },
   { name: 'subgroup_invocation_id', stage: 'fragment', io: 'in', type: 'u32' },
   { name: 'subgroup_size', stage: 'fragment', io: 'in', type: 'u32' },
+  { name: 'clip_distances', stage: 'vertex', io: 'out', type: 'array<f32,1>' },
+  { name: 'clip_distances', stage: 'vertex', io: 'out', type: 'array<f32,2>' },
+  { name: 'clip_distances', stage: 'vertex', io: 'out', type: 'array<f32,3>' },
+  { name: 'clip_distances', stage: 'vertex', io: 'out', type: 'array<f32,4>' },
+  { name: 'clip_distances', stage: 'vertex', io: 'out', type: 'array<f32,5>' },
+  { name: 'clip_distances', stage: 'vertex', io: 'out', type: 'array<f32,6>' },
+  { name: 'clip_distances', stage: 'vertex', io: 'out', type: 'array<f32,7>' },
+  { name: 'clip_distances', stage: 'vertex', io: 'out', type: 'array<f32,8>' },
 ] as const;
 
 // List of types to test against.
@@ -64,7 +72,15 @@ const kTestTypes = [
   'array<bool,4>',
   'array<u32,4>',
   'array<i32,4>',
+  'array<f32,1>',
+  'array<f32,2>',
+  'array<f32,3>',
   'array<f32,4>',
+  'array<f32,5>',
+  'array<f32,6>',
+  'array<f32,7>',
+  'array<f32,8>',
+  'array<f32,9>',
   'MyStruct',
 ] as const;
 
@@ -87,7 +103,16 @@ g.test('stage_inout')
     );
     if (t.params.name.includes('subgroup')) {
       t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+    } else if (t.params.name === 'clip_distances') {
+      t.selectDeviceOrSkipTestCase('clip-distances' as GPUFeatureName);
     }
+    t.skipIf(
+      t.params.name !== 'position' &&
+        t.params.target_stage === 'vertex' &&
+        t.params.target_io === 'out' &&
+        !t.params.use_struct,
+      'missing @builtin(position) in the vertex output when the vertex output is not a struct'
+    );
   })
   .fn(t => {
     const code = generateShader({
@@ -117,9 +142,9 @@ g.test('type')
   .params(u =>
     u
       .combineWithParams(kBuiltins)
+      .combine('use_struct', [true, false] as const)
       .beginSubcases()
       .combine('target_type', kTestTypes)
-      .combine('use_struct', [true, false] as const)
   )
   .beforeAllSubcases(t => {
     t.skipIf(
@@ -128,7 +153,16 @@ g.test('type')
     );
     if (t.params.name.includes('subgroup')) {
       t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+    } else if (t.params.name === 'clip_distances') {
+      t.selectDeviceOrSkipTestCase('clip-distances' as GPUFeatureName);
     }
+    t.skipIf(
+      t.params.name !== 'position' &&
+        t.params.stage === 'vertex' &&
+        t.params.io === 'out' &&
+        !t.params.use_struct,
+      'missing @builtin(position) in the vertex output'
+    );
   })
   .fn(t => {
     let code = '';
@@ -297,14 +331,30 @@ g.test('reuse_builtin_name')
     u
       .combineWithParams(kBuiltins)
       .combine('use', ['alias', 'struct', 'function', 'module-var', 'function-var'])
+      .combine('enable_extension', [true, false])
+      .unless(
+        t => t.enable_extension && !(t.name.includes('subgroup') || t.name === 'clip_distances')
+      )
   )
   .beforeAllSubcases(t => {
+    if (!t.params.enable_extension) {
+      return;
+    }
     if (t.params.name.includes('subgroup')) {
       t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+    } else if (t.params.name === 'clip_distances') {
+      t.selectDeviceOrSkipTestCase('clip-distances' as GPUFeatureName);
     }
   })
   .fn(t => {
     let code = '';
+    if (t.params.enable_extension) {
+      if (t.params.name.includes('subgroups')) {
+        code += 'enable subgroup;\n';
+      } else if (t.params.name === 'clip_distances') {
+        code += 'enable clip_distances;\n';
+      }
+    }
     if (t.params.use === 'alias') {
       code += `alias ${t.params.name} = i32;`;
     } else if (t.params.use === `struct`) {
diff --git a/src/webgpu/shader/validation/shader_io/interpolate.spec.ts b/src/webgpu/shader/validation/shader_io/interpolate.spec.ts
index 933093e16f0f..b716093144b0 100644
--- a/src/webgpu/shader/validation/shader_io/interpolate.spec.ts
+++ b/src/webgpu/shader/validation/shader_io/interpolate.spec.ts
@@ -9,15 +9,11 @@ import { generateShader } from './util.js';
 export const g = makeTestGroup(ShaderValidationTest);
 
 // List of valid interpolation attributes.
-const kValidCompatInterpolationAttributes = new Set([
+const kValidInterpolationAttributes = new Set([
   '',
-  '@interpolate(flat, either)',
   '@interpolate(perspective)',
   '@interpolate(perspective, center)',
   '@interpolate(perspective, centroid)',
-]);
-const kValidInterpolationAttributes = new Set([
-  ...kValidCompatInterpolationAttributes,
   '@interpolate(flat)',
   '@interpolate(flat, first)',
   '@interpolate(flat, either)',
@@ -83,10 +79,7 @@ g.test('type_and_sampling')
       io: t.params.io,
       use_struct: t.params.use_struct,
     });
-    const validInterpolationAttributes = t.isCompatibility
-      ? kValidCompatInterpolationAttributes
-      : kValidInterpolationAttributes;
-    t.expectCompileResult(validInterpolationAttributes.has(interpolate), code);
+    t.expectCompileResult(kValidInterpolationAttributes.has(interpolate), code);
   });
 
 g.test('require_location')
@@ -140,9 +133,7 @@ g.test('integral_types')
       use_struct: t.params.use_struct,
     });
 
-    const expectSuccess = t.isCompatibility
-      ? t.params.attribute === '@interpolate(flat, either)'
-      : t.params.attribute.startsWith('@interpolate(flat');
+    const expectSuccess = t.params.attribute.startsWith('@interpolate(flat');
     t.expectCompileResult(expectSuccess, code);
   });
 
@@ -160,7 +151,7 @@ g.test('duplicate')
     t.expectCompileResult(t.params.attr === '', code);
   });
 
-const kValidationTests: { [key: string]: { src: string; pass: boolean; compatPass?: boolean } } = {
+const kValidationTests: { [key: string]: { src: string; pass: boolean } } = {
   valid: {
     src: `@interpolate(perspective)`,
     pass: true,
@@ -172,7 +163,6 @@ const kValidationTests: { [key: string]: { src: string; pass: boolean; compatPas
   trailing_comma_one_arg: {
     src: `@interpolate(flat,)`,
     pass: true,
-    compatPass: false,
   },
   trailing_comma_two_arg: {
     src: `@interpolate(perspective, center,)`,
@@ -230,9 +220,6 @@ g.test('interpolation_validation')
     @builtin(position) vec4<f32> {
   return vec4f(0);
 }`;
-    const expectSuccess =
-      kValidationTests[t.params.attr].pass &&
-      (t.isCompatibility ? kValidationTests[t.params.attr].compatPass ?? true : true);
-
+    const expectSuccess = kValidationTests[t.params.attr].pass;
     t.expectCompileResult(expectSuccess, code);
   });
diff --git a/src/webgpu/shader/validation/shader_io/util.ts b/src/webgpu/shader/validation/shader_io/util.ts
index d115d79328b4..b71fd2aab42a 100644
--- a/src/webgpu/shader/validation/shader_io/util.ts
+++ b/src/webgpu/shader/validation/shader_io/util.ts
@@ -27,6 +27,9 @@ export function generateShader({
   if (attribute.includes('subgroup')) {
     code += 'enable subgroups;\n';
   }
+  if (attribute.includes('clip_distances')) {
+    code += 'enable clip_distances;\n';
+  }
 
   if (use_struct) {
     // Generate a struct that wraps the entry point IO variable.
diff --git a/src/webgpu/shader/validation/shader_validation_test.ts b/src/webgpu/shader/validation/shader_validation_test.ts
index 6a4cae331766..5db47bd586ba 100644
--- a/src/webgpu/shader/validation/shader_validation_test.ts
+++ b/src/webgpu/shader/validation/shader_validation_test.ts
@@ -119,9 +119,14 @@ export class ShaderValidationTest extends GPUTest {
     constants?: Record<string, GPUPipelineConstantValue>;
     // List of additional module-scope variable the entrypoint needs to reference
     reference?: string[];
+    // List of additional statements to insert in the entry point.
+    statements?: string[];
   }) {
     const phonies: Array<string> = [];
 
+    if (args.statements !== undefined) {
+      phonies.push(...args.statements);
+    }
     if (args.constants !== undefined) {
       phonies.push(...keysOf(args.constants).map(c => `_ = ${c};`));
     }
diff --git a/src/webgpu/shader/validation/types/textures.spec.ts b/src/webgpu/shader/validation/types/textures.spec.ts
index 7b8f1748c113..f619877e2bc1 100644
--- a/src/webgpu/shader/validation/types/textures.spec.ts
+++ b/src/webgpu/shader/validation/types/textures.spec.ts
@@ -120,7 +120,7 @@ Besides, the shader compilation should always pass regardless of whether the for
     const { format, access, comma } = t.params;
     // bgra8unorm is considered a valid storage format at shader compilation stage
     const isFormatValid =
-      isTextureFormatUsableAsStorageFormat(format, t.isCompatibility) || format === 'bgra8unorm';
+      isTextureFormatUsableAsStorageFormat(format, false) || format === 'bgra8unorm';
     const isAccessValid = kAccessModes.includes(access);
     const wgsl = `@group(0) @binding(0) var tex: texture_storage_2d<${format}, ${access}${comma}>;`;
     t.expectCompileResult(isFormatValid && isAccessValid, wgsl);
diff --git a/src/webgpu/util/math.ts b/src/webgpu/util/math.ts
index 20d7818df65d..d5ca2b41320e 100644
--- a/src/webgpu/util/math.ts
+++ b/src/webgpu/util/math.ts
@@ -961,6 +961,17 @@ export function scalarF32Range(
   counts.neg_norm = counts.neg_norm === undefined ? counts.pos_norm : counts.neg_norm;
   counts.neg_sub = counts.neg_sub === undefined ? counts.pos_sub : counts.neg_sub;
 
+  let special_pos: number[] = [];
+  // The first interior point for 'pos_norm' is at 3. Because we have two special values we start allowing these
+  // special values as soon as they will fit as interior values.
+  if (counts.pos_norm >= 4) {
+    special_pos = [
+      // Largest float as signed integer
+      0x4effffff,
+      // Largest float as unsigned integer
+      0x4f7fffff,
+    ];
+  }
   // Generating bit fields first and then converting to f32, so that the spread across the possible f32 values is more
   // even. Generating against the bounds of f32 values directly results in the values being extremely biased towards the
   // extremes, since they are so much larger.
@@ -980,7 +991,14 @@ export function scalarF32Range(
       kBit.f32.positive.subnormal.max,
       counts.pos_sub
     ),
-    ...linearRange(kBit.f32.positive.min, kBit.f32.positive.max, counts.pos_norm),
+    ...[
+      ...linearRange(
+        kBit.f32.positive.min,
+        kBit.f32.positive.max,
+        counts.pos_norm - special_pos.length
+      ),
+      ...special_pos,
+    ].sort((n1, n2) => n1 - n2),
   ].map(Math.trunc);
   return bit_fields.map(reinterpretU32AsF32);
 }
diff --git a/src/webgpu/util/texture.ts b/src/webgpu/util/texture.ts
index badce71baa34..20e99fdfad4d 100644
--- a/src/webgpu/util/texture.ts
+++ b/src/webgpu/util/texture.ts
@@ -17,6 +17,7 @@ const kLoadValueFromStorageInfo: Partial<{
     texelType: string;
     unpackWGSL: string;
     useFragDepth?: boolean;
+    discardWithStencil?: boolean;
   };
 }> = {
   r8unorm: {
@@ -233,17 +234,27 @@ const kLoadValueFromStorageInfo: Partial<{
     `,
     useFragDepth: true,
   },
+  stencil8: {
+    storageType: 'u32',
+    texelType: 'vec4u',
+    unpackWGSL: `
+      return vec4u(unpack4xU8(src[byteOffset / 4])[byteOffset % 4], 123, 123, 123)
+    `,
+    discardWithStencil: true,
+  },
 };
 
 function getCopyBufferToTextureViaRenderCode(format: GPUTextureFormat) {
   const info = kLoadValueFromStorageInfo[format];
   assert(!!info);
-  const { storageType, texelType, unpackWGSL, useFragDepth } = info;
+  const { storageType, texelType, unpackWGSL, useFragDepth, discardWithStencil } = info;
 
   const [depthDecl, depthCode] = useFragDepth
     ? ['@builtin(frag_depth) d: f32,', 'fs.d = fs.v[0];']
     : ['', ''];
 
+  const stencilCode = discardWithStencil ? 'if ((fs.v.r & vin.stencilMask) == 0) { discard; }' : '';
+
   return `
     struct Uniforms {
       numTexelRows: u32,
@@ -255,9 +266,10 @@ function getCopyBufferToTextureViaRenderCode(format: GPUTextureFormat) {
     struct VSOutput {
       @builtin(position) pos: vec4f,
       @location(0) @interpolate(flat, either) sampleIndex: u32,
+      @location(1) @interpolate(flat, either) stencilMask: u32,
     };
 
-    @vertex fn vs(@builtin(vertex_index) vNdx: u32) -> VSOutput {
+    @vertex fn vs(@builtin(vertex_index) vNdx: u32, @builtin(instance_index) iNdx: u32) -> VSOutput {
       let points = array(
         vec2f(0, 0), vec2f(1, 0), vec2f(0, 1), vec2f(1, 1),
       );
@@ -266,7 +278,10 @@ function getCopyBufferToTextureViaRenderCode(format: GPUTextureFormat) {
       let rowOffset = f32(sampleRow) / numSampleRows;
       let rowMult = 1.0 / numSampleRows;
       let p = (points[vNdx % 4] * vec2f(1, rowMult) + vec2f(0, rowOffset)) * 2.0 - 1.0;
-      return VSOutput(vec4f(p, 0, 1), uni.sampleCount - sampleRow % uni.sampleCount - 1);
+      return VSOutput(
+        vec4f(p, 0, 1),
+        uni.sampleCount - sampleRow % uni.sampleCount - 1,
+        1u << iNdx);
     }
 
     @group(0) @binding(0) var<uniform> uni: Uniforms;
@@ -289,6 +304,7 @@ function getCopyBufferToTextureViaRenderCode(format: GPUTextureFormat) {
       var fs: FSOutput;
       fs.v = unpack(byteOffset);
       ${depthCode}
+      ${stencilCode}
       return fs;
     }
     `;
@@ -312,114 +328,158 @@ function copyBufferToTextureViaRender(
 
   const msInfo = kLoadValueFromStorageInfo[format];
   assert(!!msInfo);
-  const { useFragDepth } = msInfo;
+  const { useFragDepth, discardWithStencil } = msInfo;
 
   const { device } = t;
-  const code = getCopyBufferToTextureViaRenderCode(format);
-  const id = JSON.stringify({ format, useFragDepth, sampleCount, code });
-  const pipelines =
-    s_copyBufferToTextureViaRenderPipelines.get(device) ?? new Map<string, GPURenderPipeline>();
-  s_copyBufferToTextureViaRenderPipelines.set(device, pipelines);
-  let pipeline = pipelines.get(id);
-  if (!pipeline) {
-    const module = device.createShaderModule({ code });
-    pipeline = device.createRenderPipeline({
-      layout: 'auto',
-      vertex: { module },
-      ...(useFragDepth
-        ? {
-            fragment: {
-              module,
-              targets: [],
-            },
-            depthStencil: {
-              depthWriteEnabled: true,
-              depthCompare: 'always',
-              format,
-            },
-          }
-        : {
-            fragment: {
-              module,
-              targets: [{ format }],
-            },
-          }),
-      primitive: {
-        topology: 'triangle-strip',
-      },
-      ...(sampleCount > 1 && { multisample: { count: sampleCount } }),
+  const numBlits = discardWithStencil ? 8 : 1;
+  for (let blitCount = 0; blitCount < numBlits; ++blitCount) {
+    const code = getCopyBufferToTextureViaRenderCode(format);
+    const stencilWriteMask = 1 << blitCount;
+    const id = JSON.stringify({
+      format,
+      useFragDepth,
+      stencilWriteMask,
+      discardWithStencil,
+      sampleCount,
+      code,
     });
-    pipelines.set(id, pipeline);
-  }
+    const pipelines =
+      s_copyBufferToTextureViaRenderPipelines.get(device) ?? new Map<string, GPURenderPipeline>();
+    s_copyBufferToTextureViaRenderPipelines.set(device, pipelines);
+    let pipeline = pipelines.get(id);
+    if (!pipeline) {
+      const module = device.createShaderModule({ code });
+      pipeline = device.createRenderPipeline({
+        label: `blitCopyFor-${format}`,
+        layout: 'auto',
+        vertex: { module },
+        ...(discardWithStencil
+          ? {
+              fragment: {
+                module,
+                targets: [],
+              },
+              depthStencil: {
+                depthWriteEnabled: false,
+                depthCompare: 'always',
+                format,
+                stencilWriteMask,
+                stencilFront: {
+                  passOp: 'replace',
+                },
+              },
+            }
+          : useFragDepth
+          ? {
+              fragment: {
+                module,
+                targets: [],
+              },
+              depthStencil: {
+                depthWriteEnabled: true,
+                depthCompare: 'always',
+                format,
+              },
+            }
+          : {
+              fragment: {
+                module,
+                targets: [{ format }],
+              },
+            }),
+        primitive: {
+          topology: 'triangle-strip',
+        },
+        ...(sampleCount > 1 && { multisample: { count: sampleCount } }),
+      });
+      pipelines.set(id, pipeline);
+    }
 
-  const info = kTextureFormatInfo[format];
-  const uniforms = new Uint32Array([
-    copySize.height, //  numTexelRows: u32,
-    source.bytesPerRow!, //  bytesPerRow: u32,
-    info.bytesPerBlock!, //  bytesPerSample: u32,
-    dest.texture.sampleCount, //  sampleCount: u32,
-  ]);
-  const uniformBuffer = t.makeBufferWithContents(
-    uniforms,
-    GPUBufferUsage.COPY_DST | GPUBufferUsage.UNIFORM
-  );
-  const storageBuffer = t.createBufferTracked({
-    size: source.buffer.size,
-    usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE,
-  });
-  encoder.copyBufferToBuffer(source.buffer, 0, storageBuffer, 0, storageBuffer.size);
-  const baseMipLevel = dest.mipLevel;
-  for (let l = 0; l < copySize.depthOrArrayLayers; ++l) {
-    const baseArrayLayer = origin.z + l;
-    const mipLevelCount = 1;
-    const arrayLayerCount = 1;
-    const pass = encoder.beginRenderPass(
-      useFragDepth
-        ? {
-            colorAttachments: [],
-            depthStencilAttachment: {
-              view: dest.texture.createView({
-                baseMipLevel,
-                baseArrayLayer,
-                mipLevelCount,
-                arrayLayerCount,
-              }),
-              depthClearValue: 0,
-              depthLoadOp: 'clear',
-              depthStoreOp: 'store',
-            },
-          }
-        : {
-            colorAttachments: [
-              {
+    const info = kTextureFormatInfo[format];
+    const uniforms = new Uint32Array([
+      copySize.height, //  numTexelRows: u32,
+      source.bytesPerRow!, //  bytesPerRow: u32,
+      info.bytesPerBlock!, //  bytesPerSample: u32,
+      dest.texture.sampleCount, //  sampleCount: u32,
+    ]);
+    const uniformBuffer = t.makeBufferWithContents(
+      uniforms,
+      GPUBufferUsage.COPY_DST | GPUBufferUsage.UNIFORM
+    );
+    const storageBuffer = t.createBufferTracked({
+      size: source.buffer.size,
+      usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE,
+    });
+    encoder.copyBufferToBuffer(source.buffer, 0, storageBuffer, 0, storageBuffer.size);
+    const baseMipLevel = dest.mipLevel;
+    for (let l = 0; l < copySize.depthOrArrayLayers; ++l) {
+      const baseArrayLayer = origin.z + l;
+      const mipLevelCount = 1;
+      const arrayLayerCount = 1;
+      const pass = encoder.beginRenderPass(
+        discardWithStencil
+          ? {
+              colorAttachments: [],
+              depthStencilAttachment: {
                 view: dest.texture.createView({
                   baseMipLevel,
                   baseArrayLayer,
                   mipLevelCount,
                   arrayLayerCount,
                 }),
-                loadOp: 'clear',
-                storeOp: 'store',
+                stencilClearValue: 0,
+                stencilLoadOp: 'load',
+                stencilStoreOp: 'store',
               },
-            ],
-          }
-    );
-    pass.setViewport(origin.x, origin.y, copySize.width, copySize.height, 0, 1);
-    pass.setPipeline(pipeline);
+            }
+          : useFragDepth
+          ? {
+              colorAttachments: [],
+              depthStencilAttachment: {
+                view: dest.texture.createView({
+                  baseMipLevel,
+                  baseArrayLayer,
+                  mipLevelCount,
+                  arrayLayerCount,
+                }),
+                depthClearValue: 0,
+                depthLoadOp: 'clear',
+                depthStoreOp: 'store',
+              },
+            }
+          : {
+              colorAttachments: [
+                {
+                  view: dest.texture.createView({
+                    baseMipLevel,
+                    baseArrayLayer,
+                    mipLevelCount,
+                    arrayLayerCount,
+                  }),
+                  loadOp: 'clear',
+                  storeOp: 'store',
+                },
+              ],
+            }
+      );
+      pass.setViewport(origin.x, origin.y, copySize.width, copySize.height, 0, 1);
+      pass.setPipeline(pipeline);
 
-    const offset =
-      (source.offset ?? 0) + (source.bytesPerRow ?? 0) * (source.rowsPerImage ?? 0) * l;
-    const bindGroup = device.createBindGroup({
-      layout: pipeline.getBindGroupLayout(0),
-      entries: [
-        { binding: 0, resource: { buffer: uniformBuffer } },
-        { binding: 1, resource: { buffer: storageBuffer, offset } },
-      ],
-    });
+      const offset =
+        (source.offset ?? 0) + (source.bytesPerRow ?? 0) * (source.rowsPerImage ?? 0) * l;
+      const bindGroup = device.createBindGroup({
+        layout: pipeline.getBindGroupLayout(0),
+        entries: [
+          { binding: 0, resource: { buffer: uniformBuffer } },
+          { binding: 1, resource: { buffer: storageBuffer, offset } },
+        ],
+      });
 
-    pass.setBindGroup(0, bindGroup);
-    pass.draw(4 * copySize.height * dest.texture.sampleCount);
-    pass.end();
+      pass.setBindGroup(0, bindGroup);
+      pass.setStencilReference(0xff);
+      pass.draw(4 * copySize.height * dest.texture.sampleCount, 1, 0, blitCount);
+      pass.end();
+    }
   }
 }
 
diff --git a/src/webgpu/util/texture/base.ts b/src/webgpu/util/texture/base.ts
index c5c6aaf20579..0bdcb141db2b 100644
--- a/src/webgpu/util/texture/base.ts
+++ b/src/webgpu/util/texture/base.ts
@@ -239,6 +239,7 @@ export function reifyTextureViewDescriptor(
   const format = view.format ?? texture.format;
   const mipLevelCount = view.mipLevelCount ?? texture.mipLevelCount - baseMipLevel;
   const dimension = view.dimension ?? defaultViewDimensionsForTexture(texture);
+  const usage = (view.usage ?? 0) === 0 ? texture.usage : view.usage!;
 
   let arrayLayerCount = view.arrayLayerCount;
   if (arrayLayerCount === undefined) {
@@ -255,6 +256,7 @@ export function reifyTextureViewDescriptor(
     format,
     dimension,
     aspect,
+    usage,
     baseMipLevel,
     mipLevelCount,
     baseArrayLayer,
diff --git a/src/webgpu/web_platform/canvas/configure.spec.ts b/src/webgpu/web_platform/canvas/configure.spec.ts
index 65b0bc1f9d7b..06e590751b33 100644
--- a/src/webgpu/web_platform/canvas/configure.spec.ts
+++ b/src/webgpu/web_platform/canvas/configure.spec.ts
@@ -3,7 +3,7 @@ Tests for GPUCanvasContext.configure.
 
 TODO:
 - Test colorSpace
-- Test viewFormats
+- Test toneMapping
 `;
 
 import { makeTestGroup } from '../../../common/framework/test_group.js';
@@ -42,6 +42,16 @@ g.test('defaults')
       format: 'rgba8unorm',
     });
 
+    const configuration = ctx.getConfiguration();
+    assert(configuration !== null);
+    t.expect(configuration.device === t.device);
+    t.expect(configuration.format === 'rgba8unorm');
+    t.expect(configuration.usage === GPUTextureUsage.RENDER_ATTACHMENT);
+    t.expect(configuration.viewFormats.length === 0);
+    t.expect(configuration.colorSpace === 'srgb');
+    t.expect(configuration.toneMapping.mode === 'standard');
+    t.expect(configuration.alphaMode === 'opaque');
+
     const currentTexture = ctx.getCurrentTexture();
     t.expect(currentTexture.format === 'rgba8unorm');
     t.expect(currentTexture.usage === GPUTextureUsage.RENDER_ATTACHMENT);
@@ -69,6 +79,9 @@ g.test('device')
     const ctx = canvas.getContext('webgpu');
     assert(ctx instanceof GPUCanvasContext, 'Failed to get WebGPU context from canvas');
 
+    // getConfiguration returns null before configure.
+    t.expect(ctx.getConfiguration() === null);
+
     // Calling configure without a device should throw a TypeError.
     t.shouldThrow('TypeError', () => {
       ctx.configure({
@@ -85,8 +98,20 @@ g.test('device')
     ctx.configure({
       device: t.device,
       format: 'rgba8unorm',
+      alphaMode: 'opaque',
     });
 
+    // getConfiguration will succeed after configure.
+    const configuration = ctx.getConfiguration();
+    assert(configuration !== null);
+    t.expect(configuration.device === t.device);
+    t.expect(configuration.format === 'rgba8unorm');
+    t.expect(configuration.usage === GPUTextureUsage.RENDER_ATTACHMENT);
+    t.expect(configuration.viewFormats.length === 0);
+    t.expect(configuration.colorSpace === 'srgb');
+    t.expect(configuration.toneMapping.mode === 'standard');
+    t.expect(configuration.alphaMode === 'opaque');
+
     // getCurrentTexture will succeed with a valid device.
     ctx.getCurrentTexture();
 
@@ -96,12 +121,27 @@ g.test('device')
       ctx.getCurrentTexture();
     });
 
+    // getConfiguration returns null after unconfigure.
+    t.expect(ctx.getConfiguration() === null);
+
     // Should be able to successfully configure again after unconfiguring.
     ctx.configure({
       device: t.device,
       format: 'rgba8unorm',
+      alphaMode: 'premultiplied',
     });
     ctx.getCurrentTexture();
+
+    // getConfiguration will succeed after configure.
+    const newConfiguration = ctx.getConfiguration();
+    assert(newConfiguration !== null);
+    t.expect(newConfiguration.device === t.device);
+    t.expect(newConfiguration.format === 'rgba8unorm');
+    t.expect(newConfiguration.usage === GPUTextureUsage.RENDER_ATTACHMENT);
+    t.expect(newConfiguration.viewFormats.length === 0);
+    t.expect(newConfiguration.colorSpace === 'srgb');
+    t.expect(newConfiguration.toneMapping.mode === 'standard');
+    t.expect(newConfiguration.alphaMode === 'premultiplied');
   });
 
 g.test('format')
@@ -133,18 +173,21 @@ g.test('format')
       }
     }
 
-    t.expectValidationError(() => {
+    if (validFormat) {
       ctx.configure({
         device: t.device,
         format,
       });
-    }, !validFormat);
-
-    t.expectValidationError(() => {
-      // Should always return a texture, whether the configured format was valid or not.
-      const currentTexture = ctx.getCurrentTexture();
-      t.expect(currentTexture instanceof GPUTexture);
-    }, !validFormat);
+      const configuration = ctx.getConfiguration();
+      t.expect(configuration!.format === format);
+    } else {
+      t.shouldThrow('TypeError', () => {
+        ctx.configure({
+          device: t.device,
+          format,
+        });
+      });
+    }
   });
 
 g.test('usage')
@@ -179,6 +222,9 @@ g.test('usage')
       usage,
     });
 
+    const configuration = ctx.getConfiguration();
+    t.expect(configuration!.usage === usage);
+
     const currentTexture = ctx.getCurrentTexture();
     t.expect(currentTexture instanceof GPUTexture);
     t.expect(currentTexture.usage === usage);
@@ -289,6 +335,9 @@ g.test('alpha_mode')
       alphaMode,
     });
 
+    const configuration = ctx.getConfiguration();
+    t.expect(configuration!.alphaMode === alphaMode);
+
     const currentTexture = ctx.getCurrentTexture();
     t.expect(currentTexture instanceof GPUTexture);
   });
@@ -412,6 +461,9 @@ g.test('viewFormats')
       });
     }, !compatible);
 
+    const viewFormats = ctx.getConfiguration()!.viewFormats;
+    t.expect(viewFormats[0] === viewFormat);
+
     // Likewise for getCurrentTexture().
     let currentTexture: GPUTexture;
     t.expectValidationError(() => {
diff --git a/src/webgpu/web_platform/reftests/gpu_ref_test.ts b/src/webgpu/web_platform/reftests/gpu_ref_test.ts
index 48161ac33e87..051fb52f0037 100644
--- a/src/webgpu/web_platform/reftests/gpu_ref_test.ts
+++ b/src/webgpu/web_platform/reftests/gpu_ref_test.ts
@@ -1,5 +1,5 @@
 import { assert } from '../../../common/util/util.js';
-import { takeScreenshotDelayed } from '../../../common/util/wpt_reftest_wait.js';
+import { takeScreenshot, takeScreenshotDelayed } from '../../../common/util/wpt_reftest_wait.js';
 
 interface GPURefTest {
   readonly device: GPUDevice;
@@ -22,5 +22,8 @@ export function runRefTest(fn: (t: GPURefTest) => Promise<void> | void): void {
     await fn({ device, queue });
 
     takeScreenshotDelayed(50);
-  })();
+  })().catch(() => {
+    // remove reftest-wait to mark end of test
+    takeScreenshot();
+  });
 }
diff --git a/standalone/index.html b/standalone/index.html
index d087d6584cd9..5c21c1033744 100644
--- a/standalone/index.html
+++ b/standalone/index.html
@@ -13,8 +13,10 @@
     <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
     <link href="https://fonts.googleapis.com/css2?family=Poppins&display=swap" rel="stylesheet">
     <meta name="viewport" content="width=device-width" />
-    <!-- Chrome Origin Trial token for https://gpuweb.github.io (see dev_server.ts for localhost tokens) -->
-    <meta http-equiv="origin-trial" content="AmV1vLgjOQ01SlGnVhpoKXy7gLW+K/plXHwHKnYn4S4US98WaSesKBI+XSUMo95unQARyMGDvW70KsfyeYblZQ0AAABQeyJvcmlnaW4iOiJodHRwczovL2dwdXdlYi5naXRodWIuaW86NDQzIiwiZmVhdHVyZSI6IldlYkdQVSIsImV4cGlyeSI6MTY2MzcxODM5OX0=">
+    <!-- Chrome "WebGPU Subgroups Features" origin trial token for https://gpuweb.github.io -->
+    <meta http-equiv="origin-trial" content="AiZbfNa6FSBMZg2Ak2xeb7upejmg3jb1Ll47edOTVs7fkZLvrV4jjPh7p4J7quB9Lx6Z7l0IDc97gpPKb4F6OQcAAABheyJvcmlnaW4iOiJodHRwczovL2dwdXdlYi5naXRodWIuaW86NDQzIiwiZmVhdHVyZSI6IldlYkdQVVN1Ymdyb3Vwc0ZlYXR1cmVzIiwiZXhwaXJ5IjoxNzM5OTIzMTk5fQ==">
+    <!-- Chrome "WebGPU Subgroups Features" origin trial token for http://localhost:8080 -->
+    <meta http-equiv="origin-trial" content="AkMLfHisU+Fsbpi9g6tfKSZF4ngpsmjW4Oai360fUvZE2rgSPZDWSWb8ryrliJX5HR/Rw0yig0ir9el2hrnODwcAAABaeyJvcmlnaW4iOiJodHRwOi8vbG9jYWxob3N0OjgwODAiLCJmZWF0dXJlIjoiV2ViR1BVU3ViZ3JvdXBzRmVhdHVyZXMiLCJleHBpcnkiOjE3Mzk5MjMxOTl9">
     <link rel="stylesheet" href="third_party/normalize.min.css" />
     <script src="third_party/jquery/jquery-3.3.1.min.js"></script>
     <style>
@@ -181,7 +183,6 @@
       /* tree nodes */
 
       .nodeheader {
-        display: flex;
         width: 100%;
         padding: 0px 2px 0px 1px;
       }