mozilla · ErichDonGubler · Nov 1, 2023 · Aug 22, 2023 · Aug 22, 2023 · Aug 22, 2023
diff --git a/.eslintrc.json b/.eslintrc.json
@@ -52,7 +52,10 @@
     "@typescript-eslint/no-this-alias": "warn",
     "@typescript-eslint/no-unnecessary-type-assertion": "warn",
     "@typescript-eslint/no-unnecessary-type-constraint": "warn",
-    "@typescript-eslint/no-unused-vars": ["warn", { "vars": "all", "args": "none" }],
+    "@typescript-eslint/no-unused-vars": [
+      "warn",
+      { "vars": "all", "args": "none", "varsIgnorePattern": "^_" }
+    ],
     "@typescript-eslint/prefer-as-const": "warn",
     "@typescript-eslint/prefer-for-of": "warn",
     "@typescript-eslint/prefer-namespace-keyword": "warn",

diff --git a/Gruntfile.js b/Gruntfile.js
@@ -26,13 +26,21 @@ module.exports = function (grunt) {
         cmd: 'node',
         args: ['tools/validate', 'src/webgpu', 'src/stress', 'src/manual', 'src/unittests', 'src/demo'],
       },
+      'validate-cache': {
+        cmd: 'node',
+        args: ['tools/gen_cache', 'out', 'src/webgpu', '--validate'],
+      },
       'generate-wpt-cts-html': {
         cmd: 'node',
-        args: ['tools/gen_wpt_cts_html', 'out-wpt/cts.https.html', 'src/common/templates/cts.https.html'],
+        args: ['tools/gen_wpt_cts_html', 'tools/gen_wpt_cfg_unchunked.json'],
+      },
+      'generate-wpt-cts-html-chunked2sec': {
+        cmd: 'node',
+        args: ['tools/gen_wpt_cts_html', 'tools/gen_wpt_cfg_chunked2sec.json'],
       },
       'generate-cache': {
         cmd: 'node',
-        args: ['tools/gen_cache', 'out/data', 'src/webgpu'],
+        args: ['tools/gen_cache', 'out', 'src/webgpu'],
       },
       unittest: {
         cmd: 'node',
@@ -181,6 +189,7 @@ module.exports = function (grunt) {
     'copy:out-wpt-generated',
     'copy:out-wpt-htmlfiles',
     'run:generate-wpt-cts-html',
+    'run:generate-wpt-cts-html-chunked2sec',
   ]);
   grunt.registerTask('build-done-message', () => {
     process.stderr.write('\nBuild completed! Running checks/tests');
@@ -189,6 +198,7 @@ module.exports = function (grunt) {
   registerTaskAndAddToHelp('pre', 'Run all presubmit checks: standalone+wpt+typecheck+unittest+lint', [
     'clean',
     'run:validate',
+    'run:validate-cache',
     'build-standalone',
     'run:generate-listings',
     'build-wpt',

diff --git a/docs/adding_timing_metadata.md b/docs/adding_timing_metadata.md
@@ -0,0 +1,163 @@
+# Adding Timing Metadata
+
+## listing_meta.json files
+
+`listing_meta.json` files are SEMI AUTO-GENERATED.
+
+The raw data may be edited manually, to add entries or change timing values.
+
+The **list** of tests must stay up to date, so it can be used by external
+tools. This is verified by presubmit checks.
+
+The `subcaseMS` values are estimates. They can be set to 0 if for some reason
+you can't estimate the time (or there's an existing test with a long name and
+slow subcases that would result in query strings that are too long), but this
+will produce a non-fatal warning. Avoid creating new warnings whenever
+possible. Any existing failures should be fixed (eventually).
+
+### Performance
+
+Note this data is typically captured by developers using higher-end
+computers, so typical test machines might execute more slowly. For this
+reason, the WPT chunking should be configured to generate chunks much shorter
+than 5 seconds (a typical default time limit in WPT test executors) so they
+should still execute in under 5 seconds on lower-end computers.
+
+## Problem
+
+When adding new tests to the CTS you may occasionally see an error like this
+when running `npm test` or `npm run standalone`:
+
+```
+ERROR: Tests missing from listing_meta.json. Please add the new tests (set subcaseMS to 0 if you cannot estimate it):
+  webgpu:shader,execution,expression,binary,af_matrix_addition:matrix:*
+
+/home/runner/work/cts/cts/src/common/util/util.ts:38
+    throw new Error(msg && (typeof msg === 'string' ? msg : msg()));
+          ^
+Error:
+    at assert (/home/runner/work/cts/cts/src/common/util/util.ts:38:11)
+    at crawl (/home/runner/work/cts/cts/src/common/tools/crawl.ts:155:11)
+Warning: non-zero exit code 1
+ Use --force to continue.
+
+Aborted due to warnings.
+```
+
+What this error message is trying to tell us, is that there is no entry for
+`webgpu:shader,execution,expression,binary,af_matrix_addition:matrix:*` in
+`src/webgpu/listing_meta.json`.
+
+These entries are estimates for the amount of time that subcases take to run,
+and are used as inputs into the WPT tooling to attempt to portion out tests into
+approximately same-sized chunks.
+
+If a value has been defaulted to 0 by someone, you will see warnings like this:
+
+```
+...
+WARNING: subcaseMS≤0 found in listing_meta.json (allowed, but try to avoid):
+  webgpu:shader,execution,expression,binary,af_matrix_addition:matrix:*
+...
+```
+
+These messages should be resolved by adding appropriate entries to the JSON
+file.
+
+## Solution 1 (manual, best for simple tests)
+
+If you're developing new tests and need to update this file, it is sometimes
+easiest to do so manually. Run your tests under your usual development workflow
+and see how long they take. In the standalone web runner `npm start`, the total
+time for a test case is reported on the right-hand side when the case logs are
+expanded.
+
+Record the average time per *subcase* across all cases of the test (you may need
+to compute this) into the `listing_meta.json` file.
+
+## Solution 2 (semi-automated)
+
+There exists tooling in the CTS repo for generating appropriate estimates for
+these values, though they do require some manual intervention. The rest of this
+doc will be a walkthrough of running these tools.
+
+Timing data can be captured in bulk and "merged" into this file using
+the `merge_listing_times` tool. This is useful when a large number of tests
+change or otherwise a lot of tests need to be updated, but it also automates the
+manual steps above.
+
+The tool can also be used without any inputs to reformat `listing_meta.json`.
+Please read the help message of `merge_listing_times` for more information.
+
+### Placeholder Value
+
+If your development workflow requires a clean build, the first step is to add a
+placeholder value for entry to `src/webgpu/listing_meta.json`, since there is a
+chicken-and-egg problem for updating these values.
+
+```
+  "webgpu:shader,execution,expression,binary,af_matrix_addition:matrix:*": { "subcaseMS": 0 },
+```
+
+(It should have a value of 0, since later tooling updates the value if the newer
+value is higher.)
+
+### Websocket Logger
+
+The first tool that needs to be run is `websocket-logger`, which receives data
+on a WebSocket channel to capture timing data when CTS is run. This
+should be run in a separate process/terminal, since it needs to stay running
+throughout the following steps.
+
+In the `tools/websocket-logger/` directory:
+
+```
+npm ci
+npm start
+```
+
+The output from this command will indicate where the results are being logged,
+which will be needed later. For example:
+
+```
+...
+Writing to wslog-2023-09-12T18-57-34.txt
+...
+```
+
+### Running CTS
+
+Now we need to run the specific cases in CTS that we need to time.
+This should be possible under any development workflow (as long as its runtime environment, like Node, supports WebSockets), but the most well-tested way is using the standalone web runner.
+
+This requires serving the CTS locally. In the project root:
+
+```
+npm run standalone
+npm start
+```
+
+Once this is started you can then direct a WebGPU enabled browser to the
+specific CTS entry and run the tests, for example:
+
+```
+http://localhost:8080/standalone/?q=webgpu:shader,execution,expression,binary,af_matrix_addition:matrix:*
+```
+
+If the tests have a high variance in runtime, you can run them multiple times.
+The longest recorded time will be used.
+
+### Merging metadata
+
+The final step is to merge the new data that has been captured into the JSON
+file.
+
+This can be done using the following command:
+
+```
+tools/merge_listing_times webgpu -- tools/websocket-logger/wslog-2023-09-12T18-57-34.txt
+```
+
+where the text file is the result file from websocket-logger.
+
+Now you just need to commit the pending diff in your repo.
diff --git a/docs/fp_primer.md b/docs/fp_primer.md
@@ -69,7 +69,7 @@ reference, see
 [binary64 on Wikipedia](https://en.wikipedia.org/wiki/Double-precision_floating-point_format),
 [binary32 on Wikipedia](https://en.wikipedia.org/wiki/Single-precision_floating-point_format),
 and
-[binar16 on Wikipedia](https://en.wikipedia.org/wiki/Half-precision_floating-point_format).
+[binary16 on Wikipedia](https://en.wikipedia.org/wiki/Half-precision_floating-point_format).
 
 In the floating points formats described above, there are two possible zero
 values, one with all bits being 0, called positive zero, and one all the same
@@ -144,7 +144,7 @@ This concept of near-overflow vs far-overflow divides the real number line into
 | -∞ < `x` <= `-(2 ** (exp_max + 1))`           | must round to -∞                |
 | `-(2 ** (exp_max + 1))` < `x` <= min fp value | must round to -∞ or min value   |
 | min fp value < `x` < max fp value             | round as discussed below        |
-| min fp value <= `x` < `2 ** (exp_max + 1)`    | must round to max value or ∞    |
+| max fp value <= `x` < `2 ** (exp_max + 1)`    | must round to max value or ∞    |
 | `2 ** (exp_max + 1))` < `x`                   | implementations must round to ∞ |
 
 
@@ -184,7 +184,7 @@ operations.
 Operations, which can be thought of as mathematical functions, are mappings from
 a set of inputs to a set of outputs.
 
-Denoted `f(x, y) = X`, where f is a placeholder or the name of the operation,
+Denoted `f(x, y) = X`, where `f` is a placeholder or the name of the operation,
 lower case variables are the inputs to the function, and uppercase variables are
 the outputs of the function.
 
@@ -208,7 +208,7 @@ Some examples of different types of operations:
 `multiplication(x, y) = X`, which represents the WGSL expression `x * y`, takes
 in floating point values, `x` and `y`, and produces a floating point value `X`.
 
-`lessThen(x, y) = X`, which represents the WGSL expression `x < y`, again takes
+`lessThan(x, y) = X`, which represents the WGSL expression `x < y`, again takes
 in floating point values, but in this case returns a boolean value.
 
 `ldexp(x, y) = X`, which builds a floating point value, takes in a floating
@@ -406,9 +406,9 @@ In more precise terms:
 
   X = [min(f(x)), max(f(x))]
   X = [min(f([a, b])), max(f([a, b]))]
-  X = [f(m), f(M)]
+  X = [f(m), f(n)]
 ```
-where m and M are in `[a, b]`, `m <= M`, and produce the min and max results
+where `m` and `n` are in `[a, b]`, `m <= n`, and produce the min and max results
 for `f` on the interval, respectively.
 
 So how do we find the minima and maxima for our operation in the domain?
@@ -499,15 +499,15 @@ literally pages of expanded intervals.
 
   sin(π/2) => [sin(π/2) - 2 ** -11, sin(π/2) + 2 ** -11]
            => [0 - 2 ** -11, 0 + 2 ** -11]
-           => [-0.000488.., 0.000488...]
+           => [-0.000488…, 0.000488…]
   cos(π/2) => [cos(π/2) - 2 ** -11, cos(π/2) + 2 ** -11]
-           => [-0.500488, -0.499511...]
+           => [-0.500488…, -0.499511…]
 
   tan(π/2) => sin(π/2)/cos(π/2)
-           => [-0.000488.., 0.000488...]/[-0.500488..., -0.499511...]
-           => [min({-0.000488.../-0.500488..., -0.000488.../-0.499511..., ...}),
-               max(min({-0.000488.../-0.500488..., -0.000488.../-0.499511..., ...}) ]
-           => [0.000488.../-0.499511..., 0.000488.../0.499511...]
+           => [-0.000488…, 0.000488…]/[-0.500488…, -0.499511…]
+           => [min(-0.000488…/-0.500488…, -0.000488…/-0.499511…, 0.000488…/-0.500488…, 0.000488…/-0.499511…),
+               max(-0.000488…/-0.500488…, -0.000488…/-0.499511…, 0.000488…/-0.500488…, 0.000488…/-0.499511…)]
+           => [0.000488…/-0.499511…, 0.000488…/0.499511…]
            => [-0.0009775171, 0.0009775171]
 ```
 
@@ -553,10 +553,10 @@ These are compile vs run time, and CPU vs GPU. Broadly speaking compile time
 execution happens on the host CPU, and run time evaluation occurs on a dedicated
 GPU.
 
-(SwiftShader technically breaks this by being a software emulation of a GPU that
-runs on the CPU, but conceptually one can think of SwiftShader has being a type
-of GPU in this context, since it has similar constraints when it comes to
-precision, etc.)
+(Software graphics implementations like WARP and SwiftShader technically break this by
+being a software emulation of a GPU that runs on the CPU, but conceptually one can
+think of these implementations being a type of GPU in this context, since it has 
+similar constraints when it comes to precision, etc.)
 
 Compile time evaluation is execution that occurs when setting up a shader
 module, i.e. when compiling WGSL to a platform specific shading language. It is
@@ -588,18 +588,18 @@ let c: f32 = a + b
 and
 ```
 // compile time
-const c: f32 = 1 + 2
+const c: f32 = 1.0f + 2.0f
 ```
-should produce the same result of `3` in the variable `c`, assuming `1` and `2`
-were passed in as `a` & `b`.
+should produce the same result of `3.0` in the variable `c`, assuming `1.0` and `2.0`
+were passed in as `a` and `b`.
 
 The only difference, is when/where the execution occurs.
 
 The difference in behaviour between these two occur when the result of the
 operation is not finite for the underlying floating point type.
 
-If instead of `1` and `2`, we had `10` and `f32.max`, so the true result is
-`f32.max + 10`, the user will see a difference. Specifically the runtime
+If instead of `1.0` and `2.0`, we had `10.0` and `f32.max`, so the true result is
+`f32.max + 10.0`, the behaviours differ. Specifically the runtime
 evaluated version will still run, but the result in `c` will be an indeterminate
 value, which is any finite f32 value. For the compile time example instead,
 compiling the shader will fail validation.
@@ -611,7 +611,7 @@ execution.
 
 Unfortunately we are dealing with intervals of results and not precise results.
 So this leads to more even conceptual complexity. For runtime evaluation, this
-isn't too bad, because the rule becomes if any part of the interval is
+isn't too bad, because the rule becomes: if any part of the interval is
 non-finite then an indeterminate value can be a result, and the interval for an
 indeterminate result `[fp min, fp max]`, will include any finite portions of the
 interval.

diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -45,7 +45,7 @@
     "@types/pngjs": "^6.0.1",
     "@types/serve-index": "^1.9.1",
     "@typescript-eslint/parser": "^4.33.0",
-    "@webgpu/types": "gpuweb/types#ca1a548178567e6021fd194380b97be1bf6b07b7",
+    "@webgpu/types": "^0.1.38",
     "ansi-colors": "4.1.1",
     "babel-plugin-add-header-comment": "^1.0.3",
     "babel-plugin-const-enum": "^1.2.0",