From b99b3a5fee421198a43938c9e04252f30cdc1f39 Mon Sep 17 00:00:00 2001 From: Matt Keeter Date: Thu, 11 Jun 2026 11:23:33 -0400 Subject: [PATCH 1/2] Make output buffer copying optional --- demos/cli/src/main.rs | 7 +- fidget-wgpu/src/lib.rs | 15 +-- fidget-wgpu/src/voxel.rs | 214 +++++++++++++++++++++++------------ fidget/tests/voxel_render.rs | 12 +- 4 files changed, 164 insertions(+), 84 deletions(-) diff --git a/demos/cli/src/main.rs b/demos/cli/src/main.rs index 0f90494f..fc64c33d 100644 --- a/demos/cli/src/main.rs +++ b/demos/cli/src/main.rs @@ -312,12 +312,13 @@ fn run3d_wgpu( let cfg = fidget::wgpu::voxel::RenderConfig { world_to_model }; let mut image = Default::default(); let start = std::time::Instant::now(); - let mut buffers = ctx.buffers(image_size)?; + let buffers = ctx.buffers(image_size)?; + let mut out = ctx.image_buffer(&buffers); let shape = ctx.shape(&shape)?; let mut compute_pass_time = std::time::Duration::ZERO; for _ in 0..settings.n { - ctx.submit(&shape, &buffers, &cfg)?; - let img = ctx.map_image(&mut buffers); + ctx.submit(&shape, &buffers, Some(&mut out), &cfg)?; + let img = ctx.map_image(&mut out); compute_pass_time += img.time().unwrap(); image = img.image(); } diff --git a/fidget-wgpu/src/lib.rs b/fidget-wgpu/src/lib.rs index 976e0d3f..11c41212 100644 --- a/fidget-wgpu/src/lib.rs +++ b/fidget-wgpu/src/lib.rs @@ -114,16 +114,16 @@ impl BufferItemCount for ImageSize { } } -impl GenericFlexBuffer { +impl GenericFlexBuffer { fn new( device: &wgpu::Device, name: String, item_count: B, ) -> Result { + Self::check_size(item_count)?; let item_count = item_count.item_count(); let size = Self::calculate_buffer_size(item_count); let usage = wgpu::BufferUsages::from_bits(U).unwrap(); - Self::check_size(usage, size)?; let data = device.create_buffer(&wgpu::BufferDescriptor { label: Some(name.as_str()), size, @@ -155,10 +155,11 @@ impl GenericFlexBuffer { Self::calculate_buffer_size(self.item_count) } - fn check_size( - usage: wgpu::BufferUsages, - size: u64, - ) -> Result<(), BufferSizeError> { + fn check_size(item_count: B) -> Result<(), BufferSizeError> { + let item_count = item_count.item_count(); + let size = Self::calculate_buffer_size(item_count); + let usage = wgpu::BufferUsages::from_bits(U).unwrap(); + let buf_ty = if usage.contains(wgpu::BufferUsages::STORAGE) { BufferType::Storage } else if usage.contains(wgpu::BufferUsages::UNIFORM) { @@ -180,11 +181,11 @@ impl GenericFlexBuffer { device: &wgpu::Device, item_count: B, ) -> Result<(), BufferSizeError> { + Self::check_size(item_count)?; let item_count = item_count.item_count(); if item_count > self.item_capacity() { let size = Self::calculate_buffer_size(item_count); let usage = self.data.usage(); - Self::check_size(usage, size)?; self.data = device.create_buffer(&wgpu::BufferDescriptor { label: Some(self.name.as_str()), size, diff --git a/fidget-wgpu/src/voxel.rs b/fidget-wgpu/src/voxel.rs index 606e810a..424d674b 100644 --- a/fidget-wgpu/src/voxel.rs +++ b/fidget-wgpu/src/voxel.rs @@ -70,6 +70,7 @@ //! - Build a [`Context`] //! - Use [`Context::shape`] to convert from a [`VmShape`] to a [`RenderShape`] //! - Use [`Context::buffers`] to get [`Buffers`] at a particular image size +//! - Use [`Context::image_buffer`] to get an [`ImageReadBuffer`] //! - Call [`Context::run`] or [`Context::run_async`] to get an image //! //! ## Sync and async operation @@ -82,12 +83,24 @@ //! These functions are feature-flagged and available depending on compile //! target (native versus WebAssembly). //! +//! ## Low-level building blocks +//! +//! [`Context::run`] and `run_async` do four things: +//! +//! - Run the GPU kernels to produce an output image, which is a +//! [`GeometryPixel`] array in a GPU storage buffer +//! - Copy from that GPU storage buffer to a mappable buffer (for read-back) +//! - Map that buffer into a [`MappedImage`] +//! - Read image data back to the CPU +//! //! Lower-level building blocks are also available: [`Context::submit`] submits //! the render operations to the GPU, and [`Context::map_image`] / //! [`Context::map_image_async`] map the image buffer back to the GPU. //! -//! To reuse the image buffer within a more complex GPU pipeline (without -//! copying to the host), see [`Buffers::image_storage_buffer`]. +//! To reuse the image buffer within a more complex GPU pipeline – without +//! copying to the mappable buffer or CPU – [`Context::submit`] may be called +//! with `None` for its `out` argument. In this case, the output is available +//! in [`Buffers::image_storage_buffer`] for subsequent pipelines. use crate::{ ArrayBuffer, BufferItemCount, BufferSizeError, BufferType, ImageBuffer, @@ -1443,6 +1456,9 @@ impl RenderShape { /// /// This object is constructed by [`Context::buffers`] and may only be used with /// that particular [`Context`]. +/// +/// A successfully constructed `Buffers` object also guarantees infallible +/// construction of an [`ImageReadBuffer`] object of the same size. pub struct Buffers { /// Image render size /// @@ -1480,25 +1496,35 @@ pub struct Buffers { /// Buffer of [`GeometryPixel`] data, generated by the normal pass geom: ImageBuffer, - /// Result buffer that can be read back from the host - /// - /// This is mostly image pixels (as [`GeometryPixel`] values), but also - /// contains two trailing `u64` values for timestamps. - image: ArrayBuffer, - /// Query set for timestamps /// /// This must be present if and only if the parent context has timestamps /// enabled (per [`Context::has_timestamps`]) timestamps: Option, - /// Buffer into which we resolve the query + /// Buffer into which we resolve the timestamp query ts_buf: wgpu::Buffer, /// Cached bind groups bind_groups: BindGroups, } +/// Buffer for reading data back from the GPU +/// +/// Once mapped, this is wrapped by a [`MappedImage`] +pub struct ImageReadBuffer { + /// Image render size + image_size: VoxelSize, + + /// Result buffer that can be read back from the CPU + /// + /// This is mostly image pixels (as [`GeometryPixel`] values), but also + /// contains two trailing `u64` values for timestamps. + buffer: ImageReadArrayBuffer, +} + +type ImageReadArrayBuffer = ArrayBuffer; + /// Cached bind groups (constructed on-demand) #[derive(Default)] struct BindGroups { @@ -1805,7 +1831,7 @@ impl Buffers { /// Returns the image storage buffer and its valid size (in bytes) /// /// This is intended for subsequent shaders which want to use the - /// [`GeometryPixel`] image data without copying to the host. + /// [`GeometryPixel`] image data without copying to the CPU. /// /// The buffer is configured with `STORAGE | COPY_SRC | COPY_DST`; note /// that it is not valid for `MAP_READ` operations. To map the @@ -1834,6 +1860,16 @@ impl Buffers { as u64 <= BufferType::Storage.max_size() ); + + // Check that we can build an `ImageReadBuffer` of the appropriate + // size (even though they are stored separately) + ImageReadArrayBuffer::check_size(Self::image_buf_size(image_size)) + .map_err(|err| BuffersError { + requested: image_size, + buf: BufferName::Image, + err, + })?; + let config_buf = device.create_buffer(&wgpu::BufferDescriptor { label: Some("config"), size: (std::mem::size_of::() @@ -1876,17 +1912,6 @@ impl Buffers { err, })?; - let image = ArrayBuffer::new( - device, - "image".to_string(), - Self::image_buf_size(image_size), - ) - .map_err(|err| BuffersError { - requested: image_size, - buf: BufferName::Image, - err, - })?; - let ts_buf = device.create_buffer(&wgpu::BufferDescriptor { label: Some("ts"), size: 2 * std::mem::size_of::() as u64, @@ -1949,7 +1974,6 @@ impl Buffers { tile4, voxels, geom, - image, timestamps, z_hist_buf, ts_buf, @@ -2028,6 +2052,10 @@ impl Buffers { /// Resizes to render the target image size /// /// Internal buffers are resized to fit (only getting larger) + /// + /// This function also checks that the size is appropriate for an + /// [`ImageReadBuffer`] (though we do not store such an object), so that + /// later functions can resize it infallibly. fn set_image_size( &mut self, device: &wgpu::Device, @@ -2044,7 +2072,6 @@ impl Buffers { tile4, voxels, geom, - image, timestamps: _, ts_buf: _, bind_groups, @@ -2096,8 +2123,10 @@ impl Buffers { buf: BufferName::Geom, err, })?; - image - .grow_to_fit(device, Self::image_buf_size(image_size)) + + // Check that we can build an `ImageReadBuffer` of the appropriate + // size (even though they are stored separately) + ImageReadArrayBuffer::check_size(Self::image_buf_size(image_size)) .map_err(|err| BuffersError { requested: image_size, buf: BufferName::Image, @@ -2120,7 +2149,6 @@ impl Buffers { tile4, voxels, geom, - image, timestamps: _, ts_buf, bind_groups: _, @@ -2133,7 +2161,6 @@ impl Buffers { + tile4.capacity() + voxels.capacity() + geom.capacity() - + image.capacity() + ts_buf.size() } @@ -2150,7 +2177,6 @@ impl Buffers { tile4, voxels, geom, - image, timestamps: _, ts_buf, bind_groups: _, @@ -2163,7 +2189,6 @@ impl Buffers { + tile4.size() + voxels.size() + geom.size() - + image.size() + ts_buf.size() } } @@ -2267,6 +2292,23 @@ impl Context { Buffers::new(&self.device, image_size, self.has_timestamps) } + /// Returns an [`ImageReadBuffer`], sized to read from a [`Buffers`] object + /// + /// This is infallible because the [`Buffers`] constructor also ensures that + /// the image size is appropriate for the image read buffer (even though + /// it's constructed separately). + pub fn image_buffer(&self, buffers: &Buffers) -> ImageReadBuffer { + ImageReadBuffer { + image_size: buffers.image_size, + buffer: ArrayBuffer::new( + &self.device, + "image".to_owned(), + Buffers::image_buf_size(buffers.image_size), + ) + .unwrap(), + } + } + /// Builds a new [`RenderShape`] object for the given shape pub fn shape( &self, @@ -2282,10 +2324,11 @@ impl Context { pub fn run( &self, shape: &RenderShape, - buffers: &mut Buffers, + buffers: &Buffers, + out: &mut ImageReadBuffer, settings: RenderConfig, ) -> Result { - self.run_with_vars(shape, &Default::default(), buffers, settings) + self.run_with_vars(shape, &Default::default(), buffers, out, settings) } /// Renders the image, with a blocking wait to read pixel data from the GPU @@ -2296,11 +2339,12 @@ impl Context { &self, shape: &RenderShape, vars: &ShapeVars, - buffers: &mut Buffers, + buffers: &Buffers, + out: &mut ImageReadBuffer, settings: RenderConfig, ) -> Result { - self.submit_with_vars(shape, vars, buffers, &settings)?; - let image = self.map_image(buffers); + self.submit_with_vars(shape, vars, buffers, Some(out), &settings)?; + let image = self.map_image(out); Ok(image.image()) } @@ -2311,11 +2355,18 @@ impl Context { pub async fn run_async( &self, shape: &RenderShape, - buffers: &mut Buffers, + buffers: &Buffers, + out: &mut ImageReadBuffer, settings: RenderConfig, ) -> Result { - self.run_with_vars_async(shape, &Default::default(), buffers, settings) - .await + self.run_with_vars_async( + shape, + &Default::default(), + buffers, + out, + settings, + ) + .await } /// Renders the image, with a blocking wait to read pixel data from the GPU @@ -2326,22 +2377,32 @@ impl Context { &self, shape: &RenderShape, vars: &ShapeVars, - buffers: &mut Buffers, + buffers: &Buffers, + out: &mut ImageReadBuffer, settings: RenderConfig, ) -> Result { - self.submit_with_vars(shape, vars, buffers, &settings)?; - let image = self.map_image_async(buffers).await; + self.submit_with_vars(shape, vars, buffers, Some(out), &settings)?; + let image = self.map_image_async(out).await; Ok(image.image()) } - /// Submits a single image to be rendered using GPU acceleration + /// Submits a single image to be rendered on the GPU + /// + /// If `out` is present, then pub fn submit( &self, shape: &RenderShape, buffers: &Buffers, + out: Option<&mut ImageReadBuffer>, settings: &RenderConfig, ) -> Result<(), MissingVar> { - self.submit_with_vars(shape, &Default::default(), buffers, settings) + self.submit_with_vars( + shape, + &Default::default(), + buffers, + out, + settings, + ) } /// Submits a single image to be rendered on the GPU, with extra variables @@ -2350,6 +2411,7 @@ impl Context { shape: &RenderShape, vars: &ShapeVars, buffers: &Buffers, + out: Option<&mut ImageReadBuffer>, settings: &RenderConfig, ) -> Result<(), MissingVar> { let render_size = TileRenderSize::from(buffers.image_size); @@ -2490,26 +2552,35 @@ impl Context { // Resolve the raw GPU ticks into the resolve buffer, then copy them // into the last 16 bytes of the image buffer - if let Some(timestamps) = &buffers.timestamps { - encoder.resolve_query_set(timestamps, 0..2, &buffers.ts_buf, 0); + if let Some(image) = out { + image + .buffer + .grow_to_fit( + &self.device, + Buffers::image_buf_size(buffers.image_size), + ) + .unwrap(); // size is checked by `Buffers` constructor + if let Some(timestamps) = &buffers.timestamps { + encoder.resolve_query_set(timestamps, 0..2, &buffers.ts_buf, 0); + encoder.copy_buffer_to_buffer( + &buffers.ts_buf, + 0, + &image.buffer.data, + buffers.geom.size(), // offset past the image data + buffers.ts_buf.size(), + ); + } + + // Copy from the STORAGE | COPY_SRC -> COPY_DST | MAP_READ buffer encoder.copy_buffer_to_buffer( - &buffers.ts_buf, + &buffers.geom.data, + 0, + &image.buffer.data, 0, - &buffers.image.data, - buffers.geom.size(), // offset past the image data - buffers.ts_buf.size(), + buffers.geom.size(), ); } - // Copy from the STORAGE | COPY_SRC -> COPY_DST | MAP_READ buffer - encoder.copy_buffer_to_buffer( - &buffers.geom.data, - 0, - &buffers.image.data, - 0, - buffers.geom.size(), - ); - // Submit the commands and wait for the GPU to complete self.queue.submit(Some(encoder.finish())); Ok(()) @@ -2517,17 +2588,20 @@ impl Context { /// Synchronously maps the image buffer /// - /// The buffers are borrowed exclusively to avoid double-mapping + /// The image is borrowed exclusively to avoid double-mapping /// /// This is a blocking function suitable for use on the desktop #[cfg(not(target_arch = "wasm32"))] - pub fn map_image<'a>(&self, buffers: &'a mut Buffers) -> MappedImage<'a> { - let slice = buffers.image.map_async(|_| {}); + pub fn map_image<'a>( + &self, + image: &'a mut ImageReadBuffer, + ) -> MappedImage<'a> { + let slice = image.buffer.map_async(|_| {}); self.device .poll(wgpu::PollType::wait_indefinitely()) .unwrap(); MappedImage { - buffers, + image, slice, ns_per_tick: if self.has_timestamps { Some(self.queue.get_timestamp_period()) @@ -2539,19 +2613,19 @@ impl Context { /// Asynchronously maps the image buffer /// - /// The buffers are borrowed exclusively to avoid double-mapping + /// The image is borrowed exclusively to avoid double-mapping /// /// This is an `async` function suitable for use in WebAssembly. #[cfg(any(target_arch = "wasm32", doc))] pub async fn map_image_async<'a>( &self, - buffers: &'a mut Buffers, + image: &'a mut ImageReadBuffer, ) -> MappedImage<'a> { let (tx, rx) = flume::bounded(0); - let slice = buffers.image.map_async(move |_| tx.send(()).unwrap()); + let slice = image.buffer.map_async(move |_| tx.send(()).unwrap()); rx.recv_async().await.unwrap(); MappedImage { - buffers, + image, slice, ns_per_tick: if self.has_timestamps { Some(self.queue.get_timestamp_period()) @@ -2609,7 +2683,7 @@ impl Context { /// Handle to a mapped image, which unmaps the image when dropped pub struct MappedImage<'a> { - buffers: &'a Buffers, + image: &'a ImageReadBuffer, slice: wgpu::BufferSlice<'a>, /// Nanoseconds per tick, for resolving timestamps @@ -2618,7 +2692,7 @@ pub struct MappedImage<'a> { impl Drop for MappedImage<'_> { fn drop(&mut self) { - self.buffers.image.data.unmap(); + self.image.buffer.data.unmap(); } } @@ -2631,7 +2705,7 @@ impl MappedImage<'_> { ) .unwrap() .to_owned(); - Image::build(result, self.buffers.image_size).unwrap() + Image::build(result, self.image.image_size).unwrap() } /// Returns the time spent in the compute pass @@ -2652,8 +2726,8 @@ impl MappedImage<'_> { } fn image_bytes(&self) -> usize { - (self.buffers.image_size.width() as usize) - * (self.buffers.image_size.height() as usize) + (self.image.image_size.width() as usize) + * (self.image.image_size.height() as usize) * std::mem::size_of::() } } diff --git a/fidget/tests/voxel_render.rs b/fidget/tests/voxel_render.rs index 6bb1e09d..bc8aa9cc 100644 --- a/fidget/tests/voxel_render.rs +++ b/fidget/tests/voxel_render.rs @@ -127,7 +127,8 @@ mod wgpu { let size = 32; let image_size = RenderSize::from(size); - let mut buf = ctx.buffers(image_size).unwrap(); + let buf = ctx.buffers(image_size).unwrap(); + let mut out = ctx.image_buffer(&buf); for scale in [1.0, 0.5] { for r in [0.5, 0.75] { let sphere = (x.square() + y.square() + z.square()).sqrt() @@ -137,7 +138,8 @@ mod wgpu { let image = ctx .run( &shape, - &mut buf, + &buf, + &mut out, fidget::wgpu::voxel::RenderConfig { world_to_model: View3::from_center_and_scale( Vector3::zeros(), @@ -187,7 +189,8 @@ mod wgpu { let size = 32; let image_size = RenderSize::from(size); - let mut buf = ctx.buffers(image_size).unwrap(); + let buf = ctx.buffers(image_size).unwrap(); + let mut out = ctx.image_buffer(&buf); for scale in [1.0, 0.5] { for r in [0.5, 0.75] { let mut vars = ShapeVars::new(); @@ -196,7 +199,8 @@ mod wgpu { .run_with_vars( &render_shape, &vars, - &mut buf, + &buf, + &mut out, fidget::wgpu::voxel::RenderConfig { world_to_model: View3::from_center_and_scale( Vector3::zeros(), From 37bd5a4be8e99b3618a10aaaee02080317062526 Mon Sep 17 00:00:00 2001 From: Matt Keeter Date: Sat, 13 Jun 2026 09:05:51 -0400 Subject: [PATCH 2/2] Better docs; fix image.image_size not being updated --- fidget-wgpu/src/voxel.rs | 72 +++++++++++++++++++++++++++++----------- 1 file changed, 53 insertions(+), 19 deletions(-) diff --git a/fidget-wgpu/src/voxel.rs b/fidget-wgpu/src/voxel.rs index 424d674b..99555e4c 100644 --- a/fidget-wgpu/src/voxel.rs +++ b/fidget-wgpu/src/voxel.rs @@ -1511,6 +1511,9 @@ pub struct Buffers { /// Buffer for reading data back from the GPU /// +/// This object is constructed by [`Context::image_buffer`] and may only be used +/// with that particular [`Context`]. +/// /// Once mapped, this is wrapped by a [`MappedImage`] pub struct ImageReadBuffer { /// Image render size @@ -1523,6 +1526,33 @@ pub struct ImageReadBuffer { buffer: ImageReadArrayBuffer, } +impl ImageReadBuffer { + fn new( + device: &wgpu::Device, + name: String, + image_size: VoxelSize, + ) -> Result { + Ok(Self { + image_size, + buffer: ImageReadArrayBuffer::new( + device, + name, + Buffers::image_buf_size(image_size), + )?, + }) + } + + fn grow_to_fit( + &mut self, + device: &wgpu::Device, + image_size: VoxelSize, + ) -> Result<(), BufferSizeError> { + self.image_size = image_size; + self.buffer + .grow_to_fit(device, Buffers::image_buf_size(image_size)) + } +} + type ImageReadArrayBuffer = ArrayBuffer; /// Cached bind groups (constructed on-demand) @@ -1845,9 +1875,7 @@ impl Buffers { pub fn image_storage_buffer(&self) -> (&wgpu::Buffer, u64) { (&self.geom.data, self.geom.size()) } -} -impl Buffers { fn new( device: &wgpu::Device, image_size: VoxelSize, @@ -2298,15 +2326,15 @@ impl Context { /// the image size is appropriate for the image read buffer (even though /// it's constructed separately). pub fn image_buffer(&self, buffers: &Buffers) -> ImageReadBuffer { - ImageReadBuffer { - image_size: buffers.image_size, - buffer: ArrayBuffer::new( - &self.device, - "image".to_owned(), - Buffers::image_buf_size(buffers.image_size), - ) - .unwrap(), - } + ImageReadBuffer::new( + &self.device, + "image".to_owned(), + buffers.image_size, + ) + .expect( + "buffers.image_size should always be \ + a valid size for ImageReadBuffer::new", + ) } /// Builds a new [`RenderShape`] object for the given shape @@ -2388,7 +2416,14 @@ impl Context { /// Submits a single image to be rendered on the GPU /// - /// If `out` is present, then + /// The resulting image (as a buffer of [`GeometryPixel`] data) is available + /// on the GPU in + /// [`buffers.image_storage_buffer()`](Buffers::image_storage_buffer). + /// + /// If `out` is present, then the rendered image is also copied to that + /// [`ImageReadBuffer`] (which may then be mapped for CPU reading by + /// [`map_image`](Self::map_image) or + /// [`map_image_async`](Self::map_image_async)). pub fn submit( &self, shape: &RenderShape, @@ -2406,6 +2441,8 @@ impl Context { } /// Submits a single image to be rendered on the GPU, with extra variables + /// + /// See [`submit`](Self::submit) for additional details. pub fn submit_with_vars( &self, shape: &RenderShape, @@ -2553,13 +2590,10 @@ impl Context { // Resolve the raw GPU ticks into the resolve buffer, then copy them // into the last 16 bytes of the image buffer if let Some(image) = out { - image - .buffer - .grow_to_fit( - &self.device, - Buffers::image_buf_size(buffers.image_size), - ) - .unwrap(); // size is checked by `Buffers` constructor + image.grow_to_fit(&self.device, buffers.image_size).expect( + "buffers.image_size should always be \ + a valid size for ImageReadBuffer::grow_to_fit", + ); if let Some(timestamps) = &buffers.timestamps { encoder.resolve_query_set(timestamps, 0..2, &buffers.ts_buf, 0); encoder.copy_buffer_to_buffer(