wgpu/util/
belt.rs

1use crate::{
2    util::align_to, Buffer, BufferAddress, BufferDescriptor, BufferSize, BufferSlice, BufferUsages,
3    BufferViewMut, CommandEncoder, Device, MapMode,
4};
5use alloc::vec::Vec;
6use core::fmt;
7use std::sync::mpsc;
8use wgt::Features;
9
10use crate::COPY_BUFFER_ALIGNMENT;
11
12/// Efficiently performs many buffer writes by sharing and reusing temporary buffers.
13///
14/// Internally it uses a ring-buffer of staging buffers that are sub-allocated.
15/// Its advantage over [`Queue::write_buffer_with()`] is that the individual allocations
16/// are cheaper; `StagingBelt` is most useful when you are writing very many small pieces
17/// of data. It can be understood as a sort of arena allocator.
18///
19/// Using a staging belt is slightly complicated, and generally goes as follows:
20/// 1. Use [`StagingBelt::write_buffer()`] or [`StagingBelt::allocate()`] to allocate
21///    buffer slices, then write your data to them.
22/// 2. Call [`StagingBelt::finish()`].
23/// 3. Submit all command encoders that were used in step 1.
24/// 4. Call [`StagingBelt::recall()`].
25///
26/// [`Queue::write_buffer_with()`]: crate::Queue::write_buffer_with
27pub struct StagingBelt {
28    device: Device,
29    chunk_size: BufferAddress,
30    /// User-specified [`BufferUsages`] used to create the chunk buffers are created.
31    ///
32    /// [`new`](Self::new) guarantees that this always contains
33    /// [`MAP_WRITE`](BufferUsages::MAP_WRITE).
34    buffer_usages: BufferUsages,
35    /// Chunks into which we are accumulating data to be transferred.
36    active_chunks: Vec<Chunk>,
37    /// Chunks that have scheduled transfers already; they are unmapped and some
38    /// command encoder has one or more commands with them as source.
39    closed_chunks: Vec<Chunk>,
40    /// Chunks that are back from the GPU and ready to be mapped for write and put
41    /// into `active_chunks`.
42    free_chunks: Vec<Chunk>,
43    /// When closed chunks are mapped again, the map callback sends them here.
44    sender: Exclusive<mpsc::Sender<Chunk>>,
45    /// Free chunks are received here to be put on `self.free_chunks`.
46    receiver: Exclusive<mpsc::Receiver<Chunk>>,
47}
48
49impl StagingBelt {
50    /// Create a new staging belt.
51    ///
52    /// The `chunk_size` is the unit of internal buffer allocation; writes will be
53    /// sub-allocated within each chunk. Therefore, for optimal use of memory, the
54    /// chunk size should be:
55    ///
56    /// * larger than the largest single [`StagingBelt::write_buffer()`] operation;
57    /// * 1-4 times less than the total amount of data uploaded per submission
58    ///   (per [`StagingBelt::finish()`]); and
59    /// * bigger is better, within these bounds.
60    ///
61    /// The buffers returned by this [`StagingBelt`] will be have the buffer usages
62    /// [`COPY_SRC | MAP_WRITE`](crate::BufferUsages)
63    pub fn new(device: Device, chunk_size: BufferAddress) -> Self {
64        Self::new_with_buffer_usages(device, chunk_size, BufferUsages::COPY_SRC)
65    }
66
67    /// Create a new staging belt.
68    ///
69    /// The `chunk_size` is the unit of internal buffer allocation; writes will be
70    /// sub-allocated within each chunk. Therefore, for optimal use of memory, the
71    /// chunk size should be:
72    ///
73    /// * larger than the largest single [`StagingBelt::write_buffer()`] operation;
74    /// * 1-4 times less than the total amount of data uploaded per submission
75    ///   (per [`StagingBelt::finish()`]); and
76    /// * bigger is better, within these bounds.
77    ///
78    /// `buffer_usages` specifies the [`BufferUsages`] the staging buffers
79    /// will be created with. [`MAP_WRITE`](BufferUsages::MAP_WRITE) will be added
80    /// automatically. The method will panic if the combination of usages is not
81    /// supported. Because [`MAP_WRITE`](BufferUsages::MAP_WRITE) is implied, the allowed usages
82    /// depends on if [`Features::MAPPABLE_PRIMARY_BUFFERS`] is enabled.
83    /// - If enabled: any usage is valid.
84    /// - If disabled: only [`COPY_SRC`](BufferUsages::COPY_SRC) can be used.
85    #[track_caller]
86    pub fn new_with_buffer_usages(
87        device: Device,
88        chunk_size: BufferAddress,
89        mut buffer_usages: BufferUsages,
90    ) -> Self {
91        let (sender, receiver) = mpsc::channel();
92
93        // make sure anything other than MAP_WRITE | COPY_SRC is only allowed with MAPPABLE_PRIMARY_BUFFERS.
94        let extra_usages =
95            buffer_usages.difference(BufferUsages::MAP_WRITE | BufferUsages::COPY_SRC);
96        if !extra_usages.is_empty()
97            && !device
98                .features()
99                .contains(Features::MAPPABLE_PRIMARY_BUFFERS)
100        {
101            panic!("Only BufferUsages::COPY_SRC may be used when Features::MAPPABLE_PRIMARY_BUFFERS is not enabled. Specified buffer usages: {buffer_usages:?}");
102        }
103        // always set MAP_WRITE
104        buffer_usages.insert(BufferUsages::MAP_WRITE);
105
106        StagingBelt {
107            device,
108            chunk_size,
109            buffer_usages,
110            active_chunks: Vec::new(),
111            closed_chunks: Vec::new(),
112            free_chunks: Vec::new(),
113            sender: Exclusive::new(sender),
114            receiver: Exclusive::new(receiver),
115        }
116    }
117
118    /// Allocate a staging belt slice of `size` to be copied into the `target` buffer
119    /// at the specified offset.
120    ///
121    /// `offset` and `size` must be multiples of [`COPY_BUFFER_ALIGNMENT`]
122    /// (as is required by the underlying buffer operations).
123    ///
124    /// The upload will be placed into the provided command encoder. This encoder
125    /// must be submitted after [`StagingBelt::finish()`] is called and before
126    /// [`StagingBelt::recall()`] is called.
127    ///
128    /// If the `size` is greater than the size of any free internal buffer, a new buffer
129    /// will be allocated for it. Therefore, the `chunk_size` passed to [`StagingBelt::new()`]
130    /// should ideally be larger than every such size.
131    #[track_caller]
132    pub fn write_buffer(
133        &mut self,
134        encoder: &mut CommandEncoder,
135        target: &Buffer,
136        offset: BufferAddress,
137        size: BufferSize,
138    ) -> BufferViewMut {
139        // Asserting this explicitly gives a usefully more specific, and more prompt, error than
140        // leaving it to regular API validation.
141        // We check only `offset`, not `size`, because `self.allocate()` will check the size.
142        assert!(
143            offset.is_multiple_of(COPY_BUFFER_ALIGNMENT),
144            "StagingBelt::write_buffer() offset {offset} must be a multiple of `COPY_BUFFER_ALIGNMENT`"
145        );
146
147        let slice_of_belt = self.allocate(
148            size,
149            const { BufferSize::new(crate::COPY_BUFFER_ALIGNMENT).unwrap() },
150        );
151        encoder.copy_buffer_to_buffer(
152            slice_of_belt.buffer(),
153            slice_of_belt.offset(),
154            target,
155            offset,
156            size.get(),
157        );
158        slice_of_belt
159            .get_mapped_range_mut()
160            .expect("Failed to get mapped range for staging belt buffer")
161    }
162
163    /// Allocate a staging belt slice with the given `size` and `alignment` and return it.
164    ///
165    /// `size` must be a multiple of [`COPY_BUFFER_ALIGNMENT`]
166    /// (as is required by the underlying buffer operations).
167    ///
168    /// To use this slice, call [`BufferSlice::get_mapped_range_mut()`] and write your data into
169    /// that [`BufferViewMut`].
170    /// (The view must be dropped before [`StagingBelt::finish()`] is called.)
171    ///
172    /// You can then record your own GPU commands to perform with the slice,
173    /// such as copying it to a texture (whereas
174    /// [`StagingBelt::write_buffer()`] can only write to other buffers).
175    /// All commands involving this slice must be submitted after
176    /// [`StagingBelt::finish()`] is called and before [`StagingBelt::recall()`] is called.
177    ///
178    /// If the `size` is greater than the space available in any free internal buffer, a new buffer
179    /// will be allocated for it. Therefore, the `chunk_size` passed to [`StagingBelt::new()`]
180    /// should ideally be larger than every such size.
181    ///
182    /// The chosen slice will be positioned within the buffer at a multiple of `alignment`,
183    /// which may be used to meet alignment requirements for the operation you wish to perform
184    /// with the slice. This does not necessarily affect the alignment of the [`BufferViewMut`].
185    #[track_caller]
186    pub fn allocate(&mut self, size: BufferSize, alignment: BufferSize) -> BufferSlice<'_> {
187        assert!(
188            size.get().is_multiple_of(COPY_BUFFER_ALIGNMENT),
189            "StagingBelt allocation size {size} must be a multiple of `COPY_BUFFER_ALIGNMENT`"
190        );
191        assert!(
192            alignment.get().is_power_of_two(),
193            "alignment must be a power of two, not {alignment}"
194        );
195        // At minimum, we must have alignment sufficient to map the buffer.
196        let alignment = alignment.get().max(crate::MAP_ALIGNMENT);
197
198        let mut chunk = if let Some(index) = self
199            .active_chunks
200            .iter()
201            .position(|chunk| chunk.can_allocate(size, alignment))
202        {
203            self.active_chunks.swap_remove(index)
204        } else {
205            self.receive_chunks(); // ensure self.free_chunks is up to date
206
207            if let Some(index) = self
208                .free_chunks
209                .iter()
210                .position(|chunk| chunk.can_allocate(size, alignment))
211            {
212                self.free_chunks.swap_remove(index)
213            } else {
214                Chunk {
215                    buffer: self.device.create_buffer(&BufferDescriptor {
216                        label: Some("(wgpu internal) StagingBelt staging buffer"),
217                        size: self.chunk_size.max(size.get()),
218                        usage: self.buffer_usages,
219                        mapped_at_creation: true,
220                    }),
221                    offset: 0,
222                }
223            }
224        };
225
226        let allocation_offset = chunk.allocate(size, alignment);
227
228        self.active_chunks.push(chunk);
229        let chunk = self.active_chunks.last().unwrap();
230
231        chunk
232            .buffer
233            .slice(allocation_offset..allocation_offset + size.get())
234    }
235
236    /// Prepare currently mapped buffers for use in a submission.
237    ///
238    /// This must be called before the command encoder(s) provided to
239    /// [`StagingBelt::write_buffer()`] are submitted.
240    ///
241    /// At this point, all the partially used staging buffers are closed (cannot be used for
242    /// further writes) until after [`StagingBelt::recall()`] is called *and* the GPU is done
243    /// copying the data from them.
244    pub fn finish(&mut self) {
245        for chunk in self.active_chunks.drain(..) {
246            chunk.buffer.unmap();
247            self.closed_chunks.push(chunk);
248        }
249    }
250
251    /// Recall all of the closed buffers back to be reused.
252    ///
253    /// This must only be called after the command encoder(s) provided to
254    /// [`StagingBelt::write_buffer()`] are submitted. Additional calls are harmless.
255    /// Not calling this as soon as possible may result in increased buffer memory usage.
256    pub fn recall(&mut self) {
257        self.receive_chunks();
258
259        for chunk in self.closed_chunks.drain(..) {
260            let sender = self.sender.get_mut().clone();
261            chunk
262                .buffer
263                .clone()
264                .slice(..)
265                .map_async(MapMode::Write, move |_| {
266                    let _ = sender.send(chunk);
267                });
268        }
269    }
270
271    /// Move all chunks that the GPU is done with (and are now mapped again)
272    /// from `self.receiver` to `self.free_chunks`.
273    fn receive_chunks(&mut self) {
274        while let Ok(mut chunk) = self.receiver.get_mut().try_recv() {
275            chunk.offset = 0;
276            self.free_chunks.push(chunk);
277        }
278    }
279}
280
281impl fmt::Debug for StagingBelt {
282    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
283        let Self {
284            device,
285            chunk_size,
286            buffer_usages,
287            active_chunks,
288            closed_chunks,
289            free_chunks,
290            sender: _,
291            receiver: _,
292        } = self;
293        f.debug_struct("StagingBelt")
294            .field("device", device)
295            .field("chunk_size", chunk_size)
296            .field("buffer_usages", buffer_usages)
297            .field("active_chunks", &active_chunks.len())
298            .field("closed_chunks", &closed_chunks.len())
299            .field("free_chunks", &free_chunks.len())
300            .finish_non_exhaustive()
301    }
302}
303
304struct Chunk {
305    buffer: Buffer,
306    offset: BufferAddress,
307}
308
309impl Chunk {
310    fn can_allocate(&self, size: BufferSize, alignment: BufferAddress) -> bool {
311        let alloc_start = align_to(self.offset, alignment);
312        let alloc_end = alloc_start + size.get();
313
314        alloc_end <= self.buffer.size()
315    }
316
317    fn allocate(&mut self, size: BufferSize, alignment: BufferAddress) -> BufferAddress {
318        let alloc_start = align_to(self.offset, alignment);
319        let alloc_end = alloc_start + size.get();
320
321        assert!(alloc_end <= self.buffer.size());
322        self.offset = alloc_end;
323        alloc_start
324    }
325}
326
327use exclusive::Exclusive;
328mod exclusive {
329    /// `Sync` wrapper that works by providing only exclusive access.
330    ///
331    /// See <https://doc.rust-lang.org/nightly/std/sync/struct.Exclusive.html>
332    pub(super) struct Exclusive<T>(T);
333
334    /// Safety: `&Exclusive` has no operations.
335    unsafe impl<T> Sync for Exclusive<T> {}
336
337    impl<T> Exclusive<T> {
338        pub fn new(value: T) -> Self {
339            Self(value)
340        }
341
342        pub fn get_mut(&mut self) -> &mut T {
343            &mut self.0
344        }
345    }
346}