diff --git a/crates/encoding/src/config.rs b/crates/encoding/src/config.rs index 8b44bb9b..f8739976 100644 --- a/crates/encoding/src/config.rs +++ b/crates/encoding/src/config.rs @@ -390,11 +390,11 @@ impl BufferSizes { // The following buffer sizes have been hand picked to accommodate the vello test scenes as // well as paris-30k. These should instead get derived from the scene layout using // reasonable heuristics. - let bin_data = BufferSize::new(1 << 18); - let tiles = BufferSize::new(1 << 21); - let lines = BufferSize::new(1 << 21); - let seg_counts = BufferSize::new(1 << 21); - let segments = BufferSize::new(1 << 21); + let bin_data = BufferSize::new(1 << 21); + let tiles = BufferSize::new(1 << 22); + let lines = BufferSize::new(1 << 22); + let seg_counts = BufferSize::new(1 << 22); + let segments = BufferSize::new(1 << 22); let ptcl = BufferSize::new(1 << 23); Self { path_reduced, diff --git a/examples/scenes/src/mmark.rs b/examples/scenes/src/mmark.rs index 3deea8d7..ac4924a3 100644 --- a/examples/scenes/src/mmark.rs +++ b/examples/scenes/src/mmark.rs @@ -105,7 +105,7 @@ impl TestScene for MMark { element.is_split ^= true; } } - let label = format!("mmark test: {} path elements (up/down to adjust)", n); + let label = format!("mmark test: complexity: {}, {} path elements (up/down to adjust)", c, n); params.text.add( scene, None, diff --git a/examples/scenes/src/test_scenes.rs b/examples/scenes/src/test_scenes.rs index e03792a1..b4dec9ad 100644 --- a/examples/scenes/src/test_scenes.rs +++ b/examples/scenes/src/test_scenes.rs @@ -597,7 +597,7 @@ fn longpathdash(cap: Cap) -> impl FnMut(&mut Scene, &mut SceneParams) { .with_caps(cap) .with_join(Join::Bevel) .with_dashes(0.0, [1.0, 1.0]), - Affine::translate((50.0, 50.0)), + Affine::scale(2.5) * Affine::translate((50.0, 50.0)), Color::YELLOW, None, &path, diff --git a/examples/with_winit/src/lib.rs b/examples/with_winit/src/lib.rs index 8346274a..fa23ee2e 100644 --- a/examples/with_winit/src/lib.rs +++ b/examples/with_winit/src/lib.rs @@ -181,6 +181,7 @@ fn run( } let mut prev_scene_ix = scene_ix - 1; let mut modifiers = ModifiersState::default(); + let mut debug_dump_time = Instant::now(); event_loop .run(move |event, event_loop| match event { Event::WindowEvent { @@ -525,6 +526,23 @@ fn run( frame_time_us: (new_time - frame_start_time).as_micros() as u64, }); frame_start_time = new_time; + + if (new_time - debug_dump_time).as_secs() > 2 && scene_complexity.is_some() { + scene.print_path_counts(); + //let bump_estimate = scene.bump_estimate(None); + let bump_actual = scene_complexity.as_ref().unwrap(); + //println!("Last frame estimated bump buffer counts:{bump_estimate}\n"); + println!( + "Last frame actual bump buffer counts:{}\n \ + \tBlend:\t\t\t{}\n\ + \tError Bits:\t\t0x{:#08x}\n\ + --------\n", + bump_actual.memory(), + bump_actual.blend, + bump_actual.failed + ); + debug_dump_time = new_time; + } } _ => {} } diff --git a/shader/flatten.wgsl b/shader/flatten.wgsl index 80da1880..e44ac6fd 100644 --- a/shader/flatten.wgsl +++ b/shader/flatten.wgsl @@ -322,6 +322,21 @@ const ESPC_ROBUST_NORMAL = 0; const ESPC_ROBUST_LOW_K1 = 1; const ESPC_ROBUST_LOW_DIST = 2; +const NEWTON_ITER = 1; +const HALLEY_ITER = 1; + +fn cbrt(x: f32) -> f32 { + var y = sign(x) * bitcast(bitcast(abs(x)) / 3u + 0x2a514067u); + for (var i = 0; i < NEWTON_ITER; i++) { + y = (2. * y + x / (y * y)) * .333333333; + } + for (var i = 0; i < HALLEY_ITER; i++) { + let y3 = y * y * y; + y *= (y3 + 2. * x) / (2. * y3 + x); + } + return y; +} + // This function flattens a cubic Bézier by first converting it into Euler spiral // segments, and then computes a near-optimal flattening of the parallel curves of // the Euler spiral segments. @@ -405,6 +420,48 @@ fn flatten_euler( let k1 = es.params.k1; let normalized_offset = offset / cubic_params.chord_len; let dist_scaled = normalized_offset * es.params.ch; +// NOTE: set this to "ifndef" to lower to arcs before flattening. Use ifdef to lower directly to lines. +#ifdef arcs + let arclen = length(es.p0 - es.p1) / es.params.ch; + let est_err = (1. / 120.) / tol * abs(k1) * (arclen + 0.4 * abs(k1 * offset)); + let n_subdiv = cbrt(est_err); + let n = max(u32(ceil(n_subdiv)), 1u); + let arc_dt = 1. / f32(n); + for (var i = 0u; i < n; i++) { + var ap1: vec2f; + let arc_t0 = f32(i) * arc_dt; + let arc_t1 = arc_t0 + arc_dt; + if i + 1u == n && t1 == 1. { + ap1 = t_end; + } else { + ap1 = es_seg_eval_with_offset(es, arc_t1, normalized_offset); + } + let t = arc_t0 + 0.5 * arc_dt - 0.5; + let k = es.params.k0 + t * k1; + let arclen_offset = arclen + normalized_offset * k; + //let r = sign(offset) * arclen_offset / k; + var r: f32; + let arc_k = k * arc_dt; + if abs(arc_k) < 1e-12 { + r = 0.; + } else { + let s = select(sign(offset), 1., offset == 0.); + r = s * 0.5 * length(ap1 - lp0) / sin(0.5 * arc_k); + } + let l0 = select(ap1, lp0, offset >= 0.); + let l1 = select(lp0, ap1, offset >= 0.); + if abs(r) < 1e-12 { + output_line_with_transform(path_ix, l0, l1, transform); + } else { + let angle = asin(0.5 * length(l1 - l0) / r); + let mid_ch = 0.5 * (l0 + l1); + let v = normalize(l1 - mid_ch) * cos(angle) * r; + let center = mid_ch - vec2(-v.y, v.x); + flatten_arc(path_ix, l0, l1, center, 2. * angle, transform); + } + lp0 = ap1; + } +#else let scale_multiplier = sqrt(0.125 * scale * cubic_params.chord_len / (es.params.ch * tol)); var a = 0.0; var b = 0.0; @@ -459,6 +516,7 @@ fn flatten_euler( output_line_with_transform(path_ix, l0, l1, transform); lp0 = lp1; } +#endif // lines last_p = this_pq1.point; last_q = this_pq1.deriv; last_t = t1; @@ -487,20 +545,25 @@ fn flatten_euler( fn flatten_arc( path_ix: u32, begin: vec2f, end: vec2f, center: vec2f, angle: f32, transform: Transform ) { +// NOTE: change this to "ifndef" to just render the arc chords. +#ifndef ablate_arc_flattening + output_line_with_transform(path_ix, begin, end, transform); +#else var p0 = transform_apply(transform, begin); var r = begin - center; let MIN_THETA = 0.0001; let tol = 0.25; let radius = max(tol, length(p0 - transform_apply(transform, center))); - let theta = max(MIN_THETA, 2. * acos(1. - tol / radius)); + var theta = max(MIN_THETA, 2. * acos(1. - tol / radius)); // Always output at least one line so that we always draw the chord. - let n_lines = max(1u, u32(ceil(angle / theta))); + let n_lines = max(1u, u32(ceil(abs(angle) / theta))); + theta = abs(angle) / f32(n_lines); let c = cos(theta); let s = sin(theta); - let rot = mat2x2(c, -s, s, c); + let rot = mat2x2(c, sign(angle) * -s, sign(angle) * s, c); let line_ix = atomicAdd(&bump.lines, n_lines); for (var i = 0u; i < n_lines - 1u; i += 1u) { @@ -511,6 +574,7 @@ fn flatten_arc( } let p1 = transform_apply(transform, end); write_line(line_ix + n_lines - 1u, path_ix, p0, p1); +#endif } fn draw_cap( @@ -552,10 +616,19 @@ fn draw_join( let cr = tan_prev.x * tan_next.y - tan_prev.y * tan_next.x; let d = dot(tan_prev, tan_next); +#ifdef inner_join + let is_backside = cr > 0.; +#endif switch style_flags & STYLE_FLAGS_JOIN_MASK { case STYLE_FLAGS_JOIN_BEVEL: { +#ifdef inner_join + let p0 = select(front0, back0, is_backside); + let p1 = select(front1, back1, is_backside); + output_line_with_transform(path_ix, p0, p1, transform); +#else output_two_lines_with_transform(path_ix, front0, front1, back0, back1, transform); +#endif } case STYLE_FLAGS_JOIN_MITER: { let hypot = length(vec2f(cr, d)); @@ -563,7 +636,9 @@ fn draw_join( var line_ix: u32; if 2. * hypot < (hypot + d) * miter_limit * miter_limit && cr != 0. { +#ifndef inner_join let is_backside = cr > 0.; +#endif let fp_last = select(front0, back1, is_backside); let fp_this = select(front1, back0, is_backside); let p = select(front0, back0, is_backside); @@ -572,7 +647,11 @@ fn draw_join( let h = (tan_prev.x * v.y - tan_prev.y * v.x) / cr; let miter_pt = fp_this - tan_next * h; +#ifdef inner_join + line_ix = atomicAdd(&bump.lines, 2u); +#else line_ix = atomicAdd(&bump.lines, 3u); +#endif write_line_with_transform(line_ix, path_ix, p, miter_pt, transform); line_ix += 1u; @@ -582,10 +661,22 @@ fn draw_join( front0 = miter_pt; } } else { +#ifdef inner_join + line_ix = atomicAdd(&bump.lines, 1u); +#else line_ix = atomicAdd(&bump.lines, 2u); +#endif + } +#ifdef inner_join + if is_backside { + write_line_with_transform(line_ix, path_ix, back0, back1, transform); + } else { + write_line_with_transform(line_ix, path_ix, front0, front1, transform); } +#else write_line_with_transform(line_ix, path_ix, front0, front1, transform); write_line_with_transform(line_ix + 1u, path_ix, back0, back1, transform); +#endif } case STYLE_FLAGS_JOIN_ROUND: { var arc0: vec2f; @@ -604,10 +695,32 @@ fn draw_join( other1 = back1; } flatten_arc(path_ix, arc0, arc1, p0, abs(atan2(cr, d)), transform); +#ifndef inner_join output_line_with_transform(path_ix, other0, other1, transform); +#endif } default: {} } +#ifdef inner_join + // Handle inner join + if abs(cr) < 1e-6 { + // smooth join, don't need to draw inner join + let inner0 = select(back0, front0, is_backside); + let inner1 = select(back1, front1, is_backside); + if any(inner0 != inner1) { + output_line_with_transform(path_ix, inner0, inner1, transform); + } + } else { + let inner0 = select(back0, front0, is_backside); + let inner1 = select(back1, front1, is_backside); + let line_ix = atomicAdd(&bump.lines, 4u); + write_line_with_transform(line_ix, path_ix, inner0, p0, transform); + write_line_with_transform(line_ix + 1, path_ix, p0, inner1, transform); + write_line_with_transform(line_ix + 2, path_ix, inner0, p0, transform); + write_line_with_transform(line_ix + 3, path_ix, p0, inner1, transform); + flatten_arc(path_ix, inner1, inner0, p0, -abs(atan2(cr, d)), transform); + } +#endif } fn read_f32_point(ix: u32) -> vec2f { diff --git a/src/lib.rs b/src/lib.rs index ed56b5ff..dcdc7314 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -300,6 +300,14 @@ impl Renderer { #[cfg(feature = "wgpu-profiler")] &mut self.profiler, )?; + #[cfg(feature = "wgpu-profiler")] + { + let mut encoder = + device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None }); + self.profiler.resolve_queries(&mut encoder); + queue.submit(Some(encoder.finish())); + self.profiler.end_frame().unwrap(); + } Ok(()) } @@ -426,7 +434,7 @@ impl Renderer { let mut render = Render::new(); let encoding = scene.encoding(); // TODO: turn this on; the download feature interacts with CPU dispatch - let robust = false; + let robust = true; let recording = render.render_encoding_coarse(encoding, &self.shaders, params, robust); let target = render.out_image(); let bump_buf = render.bump_buf(); diff --git a/src/scene.rs b/src/scene.rs index c93d68a9..c272d3c1 100644 --- a/src/scene.rs +++ b/src/scene.rs @@ -29,6 +29,10 @@ impl Scene { Self::default() } + pub fn print_path_counts(&self) { + println!("scene n_paths: {}, n_path_segments: {}", self.encoding.n_paths, self.encoding.n_path_segments); + } + /// Removes all content from the scene. pub fn reset(&mut self) { self.encoding.reset(); diff --git a/src/shaders.rs b/src/shaders.rs index e00c1a96..d5df41f8 100644 --- a/src/shaders.rs +++ b/src/shaders.rs @@ -95,6 +95,8 @@ pub fn full_shaders( let mut small_config = HashSet::new(); small_config.insert("full".into()); small_config.insert("small".into()); + let mut flatten_config = HashSet::new(); + flatten_config.insert("inner_join".into()); let mut force_gpu = false; @@ -166,7 +168,8 @@ pub fn full_shaders( let bbox_clear = add_shader!(bbox_clear, [Uniform, Buffer], &empty); let flatten = add_shader!( flatten, - [Uniform, BufReadOnly, BufReadOnly, Buffer, Buffer, Buffer] + [Uniform, BufReadOnly, BufReadOnly, Buffer, Buffer, Buffer], + &flatten_config ); let draw_reduce = add_shader!(draw_reduce, [Uniform, BufReadOnly, Buffer], &empty); let draw_leaf = add_shader!( diff --git a/src/util.rs b/src/util.rs index ae97aab1..130e2db7 100644 --- a/src/util.rs +++ b/src/util.rs @@ -138,6 +138,7 @@ impl RenderContext { ) .await .ok()?; + println!("{:?}", adapter.get_info()); let device_handle = DeviceHandle { adapter, device, diff --git a/src/wgpu_engine.rs b/src/wgpu_engine.rs index 3d464987..1224c7da 100644 --- a/src/wgpu_engine.rs +++ b/src/wgpu_engine.rs @@ -350,8 +350,8 @@ impl WgpuEngine { let mut encoder = device.create_command_encoder(&CommandEncoderDescriptor { label: Some(label) }); - #[cfg(feature = "wgpu-profiler")] - let query = profiler.begin_query(label, &mut encoder, device); + //#[cfg(feature = "wgpu-profiler")] + //let query = profiler.begin_query(label, &mut encoder, device); for command in &recording.commands { match command { Command::Upload(buf_proxy, bytes) => { @@ -485,16 +485,17 @@ impl WgpuEngine { &wgpu_shader.bind_group_layout, bindings, )?; - let mut cpass = encoder.begin_compute_pass(&Default::default()); #[cfg(feature = "wgpu-profiler")] let query = profiler - .begin_query(shader.label, &mut cpass, device) - .with_parent(Some(&query)); + .begin_query(shader.label, &mut encoder, device); + let mut cpass = encoder.begin_compute_pass(&Default::default()); + //.with_parent(Some(&query)); cpass.set_pipeline(&wgpu_shader.pipeline); cpass.set_bind_group(0, &bind_group, &[]); cpass.dispatch_workgroups(wg_size.0, wg_size.1, wg_size.2); + drop(cpass); #[cfg(feature = "wgpu-profiler")] - profiler.end_query(&mut cpass, query); + profiler.end_query(&mut encoder, query); } } } @@ -532,11 +533,13 @@ impl WgpuEngine { queue, proxy, ); - let mut cpass = encoder.begin_compute_pass(&Default::default()); - #[cfg(feature = "wgpu-profiler")] let query = profiler - .begin_query(shader.label, &mut cpass, device) - .with_parent(Some(&query)); + .begin_query(shader.label, &mut encoder, device); + let mut cpass = encoder.begin_compute_pass(&Default::default()); + //#[cfg(feature = "wgpu-profiler")] + // let query = profiler + // .begin_query(shader.label, &mut cpass, device) + // .with_parent(Some(&query)); cpass.set_pipeline(&wgpu_shader.pipeline); cpass.set_bind_group(0, &bind_group, &[]); let buf = self @@ -544,8 +547,9 @@ impl WgpuEngine { .get_gpu_buf(proxy.id) .ok_or("buffer for indirect dispatch not in map")?; cpass.dispatch_workgroups_indirect(buf, *offset); - #[cfg(feature = "wgpu-profiler")] - profiler.end_query(&mut cpass, query); + drop(cpass); + // #[cfg(feature = "wgpu-profiler")] + profiler.end_query(&mut encoder, query); } } } @@ -583,8 +587,8 @@ impl WgpuEngine { } } } - #[cfg(feature = "wgpu-profiler")] - profiler.end_query(&mut encoder, query); + // #[cfg(feature = "wgpu-profiler")] +// profiler.end_query(&mut encoder, query); queue.submit(Some(encoder.finish())); for id in free_bufs { if let Some(buf) = self.bind_map.buf_map.remove(&id) {