From 2bb53ad6e7ea2689f2f56662e5840a8d363b3108 Mon Sep 17 00:00:00 2001 From: Héctor Ramón Jiménez Date: Fri, 29 Mar 2024 04:02:24 +0100 Subject: Use a `StagingBelt` in `iced_wgpu` for regular buffer uploads --- wgpu/src/backend.rs | 26 +++++++++++++++++++++----- wgpu/src/buffer.rs | 14 ++++++++++++-- wgpu/src/image.rs | 36 ++++++++++++++++++++++++------------ wgpu/src/quad.rs | 28 ++++++++++++++++++++-------- wgpu/src/quad/gradient.rs | 5 +++-- wgpu/src/quad/solid.rs | 5 +++-- wgpu/src/text.rs | 2 ++ wgpu/src/triangle.rs | 35 ++++++++++++++++++++++++++--------- wgpu/src/window/compositor.rs | 1 + 9 files changed, 112 insertions(+), 40 deletions(-) (limited to 'wgpu/src') diff --git a/wgpu/src/backend.rs b/wgpu/src/backend.rs index 5019191c..129e9bca 100644 --- a/wgpu/src/backend.rs +++ b/wgpu/src/backend.rs @@ -30,6 +30,7 @@ pub struct Backend { pipeline_storage: pipeline::Storage, #[cfg(any(feature = "image", feature = "svg"))] image_pipeline: image::Pipeline, + staging_belt: wgpu::util::StagingBelt, } impl Backend { @@ -61,6 +62,11 @@ impl Backend { #[cfg(any(feature = "image", feature = "svg"))] image_pipeline, + + // TODO: Resize belt smartly (?) + // It would be great if the `StagingBelt` API exposed methods + // for introspection to detect when a resize may be worth it. + staging_belt: wgpu::util::StagingBelt::new(1024 * 100), } } @@ -105,6 +111,8 @@ impl Backend { &layers, ); + self.staging_belt.finish(); + self.render( device, encoder, @@ -123,12 +131,17 @@ impl Backend { self.image_pipeline.end_frame(); } + /// + pub fn recall(&mut self) { + self.staging_belt.recall(); + } + fn prepare( &mut self, device: &wgpu::Device, queue: &wgpu::Queue, format: wgpu::TextureFormat, - _encoder: &mut wgpu::CommandEncoder, + encoder: &mut wgpu::CommandEncoder, scale_factor: f32, target_size: Size, transformation: Transformation, @@ -144,7 +157,8 @@ impl Backend { if !layer.quads.is_empty() { self.quad_pipeline.prepare( device, - queue, + encoder, + &mut self.staging_belt, &layer.quads, transformation, scale_factor, @@ -157,7 +171,8 @@ impl Backend { self.triangle_pipeline.prepare( device, - queue, + encoder, + &mut self.staging_belt, &layer.meshes, scaled, ); @@ -171,8 +186,8 @@ impl Backend { self.image_pipeline.prepare( device, - queue, - _encoder, + encoder, + &mut self.staging_belt, &layer.images, scaled, scale_factor, @@ -184,6 +199,7 @@ impl Backend { self.text_pipeline.prepare( device, queue, + encoder, &layer.text, layer.bounds, scale_factor, diff --git a/wgpu/src/buffer.rs b/wgpu/src/buffer.rs index ef00c58f..f8828d46 100644 --- a/wgpu/src/buffer.rs +++ b/wgpu/src/buffer.rs @@ -61,12 +61,22 @@ impl Buffer { /// Returns the size of the written bytes. pub fn write( &mut self, - queue: &wgpu::Queue, + device: &wgpu::Device, + encoder: &mut wgpu::CommandEncoder, + belt: &mut wgpu::util::StagingBelt, offset: usize, contents: &[T], ) -> usize { let bytes: &[u8] = bytemuck::cast_slice(contents); - queue.write_buffer(&self.raw, offset as u64, bytes); + + belt.write_buffer( + encoder, + &self.raw, + offset as u64, + (bytes.len() as u64).try_into().expect("Non-empty write"), + device, + ) + .copy_from_slice(bytes); self.offsets.push(offset as u64); diff --git a/wgpu/src/image.rs b/wgpu/src/image.rs index 067b77ab..d0bf1182 100644 --- a/wgpu/src/image.rs +++ b/wgpu/src/image.rs @@ -83,21 +83,31 @@ impl Layer { fn prepare( &mut self, device: &wgpu::Device, - queue: &wgpu::Queue, + encoder: &mut wgpu::CommandEncoder, + belt: &mut wgpu::util::StagingBelt, nearest_instances: &[Instance], linear_instances: &[Instance], transformation: Transformation, ) { - queue.write_buffer( + let uniforms = Uniforms { + transform: transformation.into(), + }; + + let bytes = bytemuck::bytes_of(&uniforms); + + belt.write_buffer( + encoder, &self.uniforms, 0, - bytemuck::bytes_of(&Uniforms { - transform: transformation.into(), - }), - ); + (bytes.len() as u64).try_into().expect("Sized uniforms"), + device, + ) + .copy_from_slice(bytes); + + self.nearest + .upload(device, encoder, belt, nearest_instances); - self.nearest.upload(device, queue, nearest_instances); - self.linear.upload(device, queue, linear_instances); + self.linear.upload(device, encoder, belt, linear_instances); } fn render<'a>(&'a self, render_pass: &mut wgpu::RenderPass<'a>) { @@ -158,7 +168,8 @@ impl Data { fn upload( &mut self, device: &wgpu::Device, - queue: &wgpu::Queue, + encoder: &mut wgpu::CommandEncoder, + belt: &mut wgpu::util::StagingBelt, instances: &[Instance], ) { self.instance_count = instances.len(); @@ -168,7 +179,7 @@ impl Data { } let _ = self.instances.resize(device, instances.len()); - let _ = self.instances.write(queue, 0, instances); + let _ = self.instances.write(device, encoder, belt, 0, instances); } fn render<'a>(&'a self, render_pass: &mut wgpu::RenderPass<'a>) { @@ -383,8 +394,8 @@ impl Pipeline { pub fn prepare( &mut self, device: &wgpu::Device, - queue: &wgpu::Queue, encoder: &mut wgpu::CommandEncoder, + belt: &mut wgpu::util::StagingBelt, images: &[layer::Image], transformation: Transformation, _scale: f32, @@ -501,7 +512,8 @@ impl Pipeline { layer.prepare( device, - queue, + encoder, + belt, nearest_instances, linear_instances, transformation, diff --git a/wgpu/src/quad.rs b/wgpu/src/quad.rs index b932f54f..0717a031 100644 --- a/wgpu/src/quad.rs +++ b/wgpu/src/quad.rs @@ -57,7 +57,8 @@ impl Pipeline { pub fn prepare( &mut self, device: &wgpu::Device, - queue: &wgpu::Queue, + encoder: &mut wgpu::CommandEncoder, + belt: &mut wgpu::util::StagingBelt, quads: &Batch, transformation: Transformation, scale: f32, @@ -67,7 +68,7 @@ impl Pipeline { } let layer = &mut self.layers[self.prepare_layer]; - layer.prepare(device, queue, quads, transformation, scale); + layer.prepare(device, encoder, belt, quads, transformation, scale); self.prepare_layer += 1; } @@ -162,7 +163,8 @@ impl Layer { pub fn prepare( &mut self, device: &wgpu::Device, - queue: &wgpu::Queue, + encoder: &mut wgpu::CommandEncoder, + belt: &mut wgpu::util::StagingBelt, quads: &Batch, transformation: Transformation, scale: f32, @@ -171,15 +173,25 @@ impl Layer { let _ = info_span!("Wgpu::Quad", "PREPARE").entered(); let uniforms = Uniforms::new(transformation, scale); + let bytes = bytemuck::bytes_of(&uniforms); - queue.write_buffer( + belt.write_buffer( + encoder, &self.constants_buffer, 0, - bytemuck::bytes_of(&uniforms), - ); + (bytes.len() as u64).try_into().expect("Sized uniforms"), + device, + ) + .copy_from_slice(bytes); - self.solid.prepare(device, queue, &quads.solids); - self.gradient.prepare(device, queue, &quads.gradients); + if !quads.solids.is_empty() { + self.solid.prepare(device, encoder, belt, &quads.solids); + } + + if !quads.gradients.is_empty() { + self.gradient + .prepare(device, encoder, belt, &quads.gradients); + } } } diff --git a/wgpu/src/quad/gradient.rs b/wgpu/src/quad/gradient.rs index 560fcad2..5b32c52a 100644 --- a/wgpu/src/quad/gradient.rs +++ b/wgpu/src/quad/gradient.rs @@ -46,11 +46,12 @@ impl Layer { pub fn prepare( &mut self, device: &wgpu::Device, - queue: &wgpu::Queue, + encoder: &mut wgpu::CommandEncoder, + belt: &mut wgpu::util::StagingBelt, instances: &[Gradient], ) { let _ = self.instances.resize(device, instances.len()); - let _ = self.instances.write(queue, 0, instances); + let _ = self.instances.write(device, encoder, belt, 0, instances); self.instance_count = instances.len(); } diff --git a/wgpu/src/quad/solid.rs b/wgpu/src/quad/solid.rs index 771eee34..1cead367 100644 --- a/wgpu/src/quad/solid.rs +++ b/wgpu/src/quad/solid.rs @@ -40,11 +40,12 @@ impl Layer { pub fn prepare( &mut self, device: &wgpu::Device, - queue: &wgpu::Queue, + encoder: &mut wgpu::CommandEncoder, + belt: &mut wgpu::util::StagingBelt, instances: &[Solid], ) { let _ = self.instances.resize(device, instances.len()); - let _ = self.instances.write(queue, 0, instances); + let _ = self.instances.write(device, encoder, belt, 0, instances); self.instance_count = instances.len(); } diff --git a/wgpu/src/text.rs b/wgpu/src/text.rs index 6fa1922d..97ff77f5 100644 --- a/wgpu/src/text.rs +++ b/wgpu/src/text.rs @@ -53,6 +53,7 @@ impl Pipeline { &mut self, device: &wgpu::Device, queue: &wgpu::Queue, + encoder: &mut wgpu::CommandEncoder, sections: &[Text<'_>], layer_bounds: Rectangle, scale_factor: f32, @@ -262,6 +263,7 @@ impl Pipeline { let result = renderer.prepare( device, queue, + encoder, font_system, &mut self.atlas, glyphon::Resolution { diff --git a/wgpu/src/triangle.rs b/wgpu/src/triangle.rs index 2bb6f307..b6be54d4 100644 --- a/wgpu/src/triangle.rs +++ b/wgpu/src/triangle.rs @@ -48,7 +48,8 @@ impl Layer { fn prepare( &mut self, device: &wgpu::Device, - queue: &wgpu::Queue, + encoder: &mut wgpu::CommandEncoder, + belt: &mut wgpu::util::StagingBelt, solid: &solid::Pipeline, gradient: &gradient::Pipeline, meshes: &[Mesh<'_>], @@ -103,33 +104,47 @@ impl Layer { let uniforms = Uniforms::new(transformation * mesh.transformation()); - index_offset += - self.index_buffer.write(queue, index_offset, indices); + index_offset += self.index_buffer.write( + device, + encoder, + belt, + index_offset, + indices, + ); + self.index_strides.push(indices.len() as u32); match mesh { Mesh::Solid { buffers, .. } => { solid_vertex_offset += self.solid.vertices.write( - queue, + device, + encoder, + belt, solid_vertex_offset, &buffers.vertices, ); solid_uniform_offset += self.solid.uniforms.write( - queue, + device, + encoder, + belt, solid_uniform_offset, &[uniforms], ); } Mesh::Gradient { buffers, .. } => { gradient_vertex_offset += self.gradient.vertices.write( - queue, + device, + encoder, + belt, gradient_vertex_offset, &buffers.vertices, ); gradient_uniform_offset += self.gradient.uniforms.write( - queue, + device, + encoder, + belt, gradient_uniform_offset, &[uniforms], ); @@ -237,7 +252,8 @@ impl Pipeline { pub fn prepare( &mut self, device: &wgpu::Device, - queue: &wgpu::Queue, + encoder: &mut wgpu::CommandEncoder, + belt: &mut wgpu::util::StagingBelt, meshes: &[Mesh<'_>], transformation: Transformation, ) { @@ -252,7 +268,8 @@ impl Pipeline { let layer = &mut self.layers[self.prepare_layer]; layer.prepare( device, - queue, + encoder, + belt, &self.solid, &self.gradient, meshes, diff --git a/wgpu/src/window/compositor.rs b/wgpu/src/window/compositor.rs index 9a3e3b34..482d705b 100644 --- a/wgpu/src/window/compositor.rs +++ b/wgpu/src/window/compositor.rs @@ -243,6 +243,7 @@ pub fn present>( // Submit work let _submission = compositor.queue.submit(Some(encoder.finish())); + backend.recall(); frame.present(); Ok(()) -- cgit From 0a97b9e37ae115bb0db33193c8a6b62590a3cd2c Mon Sep 17 00:00:00 2001 From: Héctor Ramón Jiménez Date: Fri, 29 Mar 2024 09:57:11 +0100 Subject: Add documentation to `Backend::recall` in `iced_wgpu` --- wgpu/src/backend.rs | 3 +++ 1 file changed, 3 insertions(+) (limited to 'wgpu/src') diff --git a/wgpu/src/backend.rs b/wgpu/src/backend.rs index 129e9bca..20809373 100644 --- a/wgpu/src/backend.rs +++ b/wgpu/src/backend.rs @@ -131,7 +131,10 @@ impl Backend { self.image_pipeline.end_frame(); } + /// Recalls staging memory for future uploads. /// + /// This method should be called after the command encoder + /// has been submitted. pub fn recall(&mut self) { self.staging_belt.recall(); } -- cgit From 5f1eb43161d70b4ef157aae1ebc2b5fb25eb5b27 Mon Sep 17 00:00:00 2001 From: Héctor Ramón Jiménez Date: Fri, 29 Mar 2024 14:29:31 +0100 Subject: Split big `Buffer` writes into multiple chunks --- wgpu/src/backend.rs | 5 ++++- wgpu/src/buffer.rs | 42 ++++++++++++++++++++++++++++++++++-------- 2 files changed, 38 insertions(+), 9 deletions(-) (limited to 'wgpu/src') diff --git a/wgpu/src/backend.rs b/wgpu/src/backend.rs index 20809373..6ccf4111 100644 --- a/wgpu/src/backend.rs +++ b/wgpu/src/backend.rs @@ -1,3 +1,4 @@ +use crate::buffer; use crate::core::{Color, Size, Transformation}; use crate::graphics::backend; use crate::graphics::color; @@ -66,7 +67,9 @@ impl Backend { // TODO: Resize belt smartly (?) // It would be great if the `StagingBelt` API exposed methods // for introspection to detect when a resize may be worth it. - staging_belt: wgpu::util::StagingBelt::new(1024 * 100), + staging_belt: wgpu::util::StagingBelt::new( + buffer::MAX_WRITE_SIZE as u64, + ), } } diff --git a/wgpu/src/buffer.rs b/wgpu/src/buffer.rs index f8828d46..c9d6b828 100644 --- a/wgpu/src/buffer.rs +++ b/wgpu/src/buffer.rs @@ -1,6 +1,8 @@ use std::marker::PhantomData; use std::ops::RangeBounds; +pub const MAX_WRITE_SIZE: usize = 1024 * 100; + #[derive(Debug)] pub struct Buffer { label: &'static str, @@ -69,14 +71,38 @@ impl Buffer { ) -> usize { let bytes: &[u8] = bytemuck::cast_slice(contents); - belt.write_buffer( - encoder, - &self.raw, - offset as u64, - (bytes.len() as u64).try_into().expect("Non-empty write"), - device, - ) - .copy_from_slice(bytes); + if bytes.len() <= MAX_WRITE_SIZE { + belt.write_buffer( + encoder, + &self.raw, + offset as u64, + (bytes.len() as u64).try_into().expect("Non-empty write"), + device, + ) + .copy_from_slice(bytes); + } else { + let mut bytes_written = 0; + + let bytes_per_chunk = (bytes.len().min(MAX_WRITE_SIZE) as u64) + .try_into() + .expect("Non-empty write"); + + while bytes_written < bytes.len() { + belt.write_buffer( + encoder, + &self.raw, + (offset + bytes_written) as u64, + bytes_per_chunk, + device, + ) + .copy_from_slice( + &bytes[bytes_written + ..bytes_written + bytes_per_chunk.get() as usize], + ); + + bytes_written += bytes_per_chunk.get() as usize; + } + } self.offsets.push(offset as u64); -- cgit From 35af0aa84f76daddbb6d6959f9746bd09e306278 Mon Sep 17 00:00:00 2001 From: Héctor Ramón Jiménez Date: Sat, 30 Mar 2024 13:50:40 +0100 Subject: Fix batched writes logic in `iced_wgpu::buffer` --- wgpu/src/buffer.rs | 59 +++++++++++++++++++++++++++++------------------------- 1 file changed, 32 insertions(+), 27 deletions(-) (limited to 'wgpu/src') diff --git a/wgpu/src/buffer.rs b/wgpu/src/buffer.rs index c9d6b828..463ea24a 100644 --- a/wgpu/src/buffer.rs +++ b/wgpu/src/buffer.rs @@ -1,7 +1,12 @@ use std::marker::PhantomData; +use std::num::NonZeroU64; use std::ops::RangeBounds; -pub const MAX_WRITE_SIZE: usize = 1024 * 100; +pub const MAX_WRITE_SIZE: usize = 100 * 1024; + +#[allow(unsafe_code)] +const MAX_WRITE_SIZE_U64: NonZeroU64 = + unsafe { NonZeroU64::new_unchecked(MAX_WRITE_SIZE as u64) }; #[derive(Debug)] pub struct Buffer { @@ -70,40 +75,40 @@ impl Buffer { contents: &[T], ) -> usize { let bytes: &[u8] = bytemuck::cast_slice(contents); + let mut bytes_written = 0; - if bytes.len() <= MAX_WRITE_SIZE { + // Split write into multiple chunks if necessary + while bytes_written + MAX_WRITE_SIZE < bytes.len() { belt.write_buffer( encoder, &self.raw, - offset as u64, - (bytes.len() as u64).try_into().expect("Non-empty write"), + (offset + bytes_written) as u64, + MAX_WRITE_SIZE_U64, device, ) - .copy_from_slice(bytes); - } else { - let mut bytes_written = 0; - - let bytes_per_chunk = (bytes.len().min(MAX_WRITE_SIZE) as u64) - .try_into() - .expect("Non-empty write"); - - while bytes_written < bytes.len() { - belt.write_buffer( - encoder, - &self.raw, - (offset + bytes_written) as u64, - bytes_per_chunk, - device, - ) - .copy_from_slice( - &bytes[bytes_written - ..bytes_written + bytes_per_chunk.get() as usize], - ); - - bytes_written += bytes_per_chunk.get() as usize; - } + .copy_from_slice( + &bytes[bytes_written..bytes_written + MAX_WRITE_SIZE], + ); + + bytes_written += MAX_WRITE_SIZE; } + // There will always be some bytes left, since the previous + // loop guarantees `bytes_written < bytes.len()` + let bytes_left = ((bytes.len() - bytes_written) as u64) + .try_into() + .expect("non-empty write"); + + // Write them + belt.write_buffer( + encoder, + &self.raw, + (offset + bytes_written) as u64, + bytes_left, + device, + ) + .copy_from_slice(&bytes[bytes_written..]); + self.offsets.push(offset as u64); bytes.len() -- cgit