From 759c3262adc0443335b811b69cd2e6f56b35d5ee Mon Sep 17 00:00:00 2001 From: Nathan Adams Date: Mon, 21 Oct 2024 02:02:10 +0200 Subject: [PATCH 1/2] Optimise GL fence checking by querying less (#6427) --- CHANGELOG.md | 1 + wgpu-hal/src/gles/device.rs | 21 ++++++++++++++------- wgpu-hal/src/gles/mod.rs | 18 ++++++++++++++---- 3 files changed, 29 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fdb858e190..ef1b78751c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -142,6 +142,7 @@ By @bradwerth [#6216](https://github.com/gfx-rs/wgpu/pull/6216). - Fix GL debug message callbacks not being properly cleaned up (causing UB). By @Imberflur in [#6114](https://github.com/gfx-rs/wgpu/pull/6114) - Fix calling `slice::from_raw_parts` with unaligned pointers in push constant handling. By @Imberflur in [#6341](https://github.com/gfx-rs/wgpu/pull/6341) +- Optimise fence checking when `Queue::submit` is called many times per frame. By @dinnerbone in [#6427](https://github.com/gfx-rs/wgpu/pull/6427) #### WebGPU diff --git a/wgpu-hal/src/gles/device.rs b/wgpu-hal/src/gles/device.rs index 15292d95c5..0421adc047 100644 --- a/wgpu-hal/src/gles/device.rs +++ b/wgpu-hal/src/gles/device.rs @@ -8,6 +8,7 @@ use std::{ sync::{Arc, Mutex}, }; +use crate::AtomicFenceValue; use arrayvec::ArrayVec; use std::sync::atomic::Ordering; @@ -1534,7 +1535,7 @@ impl crate::Device for super::Device { unsafe fn create_fence(&self) -> Result { self.counters.fences.add(1); Ok(super::Fence { - last_completed: 0, + last_completed: AtomicFenceValue::new(0), pending: Vec::new(), }) } @@ -1560,7 +1561,7 @@ impl crate::Device for super::Device { wait_value: crate::FenceValue, timeout_ms: u32, ) -> Result { - if fence.last_completed < wait_value { + if fence.last_completed.load(Ordering::Relaxed) < wait_value { let gl = &self.shared.context.lock(); let timeout_ns = if cfg!(any(webgl, Emscripten)) { 0 @@ -1572,19 +1573,25 @@ impl crate::Device for super::Device { .iter() .find(|&&(value, _)| value >= wait_value) { - return match unsafe { + let signalled = match unsafe { gl.client_wait_sync(sync, glow::SYNC_FLUSH_COMMANDS_BIT, timeout_ns as i32) } { // for some reason firefox returns WAIT_FAILED, to investigate #[cfg(any(webgl, Emscripten))] glow::WAIT_FAILED => { log::warn!("wait failed!"); - Ok(false) + false } - glow::TIMEOUT_EXPIRED => Ok(false), - glow::CONDITION_SATISFIED | glow::ALREADY_SIGNALED => Ok(true), - _ => Err(crate::DeviceError::Lost), + glow::TIMEOUT_EXPIRED => false, + glow::CONDITION_SATISFIED | glow::ALREADY_SIGNALED => true, + _ => return Err(crate::DeviceError::Lost), }; + if signalled { + fence + .last_completed + .fetch_max(wait_value, Ordering::Relaxed); + } + return Ok(signalled); } } Ok(true) diff --git a/wgpu-hal/src/gles/mod.rs b/wgpu-hal/src/gles/mod.rs index 8eb2800895..55f8537155 100644 --- a/wgpu-hal/src/gles/mod.rs +++ b/wgpu-hal/src/gles/mod.rs @@ -120,7 +120,7 @@ use glow::HasContext; use naga::FastHashMap; use parking_lot::Mutex; -use std::sync::atomic::{AtomicU32, AtomicU8}; +use std::sync::atomic::{AtomicU32, AtomicU8, Ordering}; use std::{fmt, ops::Range, sync::Arc}; #[derive(Clone, Debug)] @@ -718,7 +718,7 @@ impl crate::DynQuerySet for QuerySet {} #[derive(Debug)] pub struct Fence { - last_completed: crate::FenceValue, + last_completed: crate::AtomicFenceValue, pending: Vec<(crate::FenceValue, glow::Fence)>, } @@ -743,13 +743,24 @@ unsafe impl Sync for Fence {} impl Fence { fn get_latest(&self, gl: &glow::Context) -> crate::FenceValue { - let mut max_value = self.last_completed; + let mut max_value = self.last_completed.load(Ordering::Relaxed); for &(value, sync) in self.pending.iter() { + if value <= max_value { + // We already know this was good, no need to check again + continue; + } let status = unsafe { gl.get_sync_status(sync) }; if status == glow::SIGNALED { max_value = value; + } else { + // Anything after the first unsignalled is guaranteed to also be unsignalled + break; } } + + // Track the latest value, to save ourselves some querying later + self.last_completed.fetch_max(max_value, Ordering::Relaxed); + max_value } @@ -763,7 +774,6 @@ impl Fence { } } self.pending.retain(|&(value, _)| value > latest); - self.last_completed = latest; } } From e06f10e0a4a02ea4ff9c2769082f057fbd69ae30 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 21 Oct 2024 00:23:43 +0000 Subject: [PATCH 2/2] build(deps): bump crate-ci/typos from 1.24.6 to 1.26.0 (#6399) Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Connor Fitzgerald --- .github/workflows/ci.yml | 2 +- CHANGELOG.md | 2 +- naga/CHANGELOG.md | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index bb7acad435..23b55eb6bf 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -629,7 +629,7 @@ jobs: run: taplo format --check --diff - name: Check for typos - uses: crate-ci/typos@v1.24.6 + uses: crate-ci/typos@v1.26.0 check-cts-runner: # runtime is normally 2 minutes diff --git a/CHANGELOG.md b/CHANGELOG.md index ef1b78751c..db56245c1d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -949,7 +949,7 @@ By @cwfitzgerald in [#5053](https://github.com/gfx-rs/wgpu/pull/5053) #### Naga -- Naga's WGSL front end now allows operators to produce values with abstract types, rather than concretizing thir operands. By @jimblandy in [#4850](https://github.com/gfx-rs/wgpu/pull/4850) and [#4870](https://github.com/gfx-rs/wgpu/pull/4870). +- Naga's WGSL front end now allows operators to produce values with abstract types, rather than concretizing their operands. By @jimblandy in [#4850](https://github.com/gfx-rs/wgpu/pull/4850) and [#4870](https://github.com/gfx-rs/wgpu/pull/4870). - Naga's WGSL front and back ends now have experimental support for 64-bit floating-point literals: `1.0lf` denotes an `f64` value. There has been experimental support for an `f64` type for a while, but until now there was no syntax for writing literals with that type. As before, Naga module validation rejects `f64` values unless `naga::valid::Capabilities::FLOAT64` is requested. By @jimblandy in [#4747](https://github.com/gfx-rs/wgpu/pull/4747). - Naga constant evaluation can now process binary operators whose operands are both vectors. By @jimblandy in [#4861](https://github.com/gfx-rs/wgpu/pull/4861). - Add `--bulk-validate` option to Naga CLI. By @jimblandy in [#4871](https://github.com/gfx-rs/wgpu/pull/4871). diff --git a/naga/CHANGELOG.md b/naga/CHANGELOG.md index 49cde4e212..1f3818fc29 100644 --- a/naga/CHANGELOG.md +++ b/naga/CHANGELOG.md @@ -689,7 +689,7 @@ MSL-OUT API -- Make `WithSpan` clonable ([#1620](https://github.com/gfx-rs/naga/pull/1620)) **@jakobhellermann** +- Make `WithSpan` cloneable ([#1620](https://github.com/gfx-rs/naga/pull/1620)) **@jakobhellermann** MSL-OUT