wgpu/util/belt.rs
1use crate::{
2 util::align_to, Buffer, BufferAddress, BufferDescriptor, BufferSize, BufferSlice, BufferUsages,
3 BufferViewMut, CommandEncoder, Device, MapMode,
4};
5use alloc::vec::Vec;
6use core::fmt;
7use std::sync::mpsc;
8use wgt::Features;
9
10use crate::COPY_BUFFER_ALIGNMENT;
11
12/// Efficiently performs many buffer writes by sharing and reusing temporary buffers.
13///
14/// Internally it uses a ring-buffer of staging buffers that are sub-allocated.
15/// Its advantage over [`Queue::write_buffer_with()`] is that the individual allocations
16/// are cheaper; `StagingBelt` is most useful when you are writing very many small pieces
17/// of data. It can be understood as a sort of arena allocator.
18///
19/// Using a staging belt is slightly complicated, and generally goes as follows:
20/// 1. Use [`StagingBelt::write_buffer()`] or [`StagingBelt::allocate()`] to allocate
21/// buffer slices, then write your data to them.
22/// 2. Call [`StagingBelt::finish()`].
23/// 3. Submit all command encoders that were used in step 1.
24/// 4. Call [`StagingBelt::recall()`].
25///
26/// [`Queue::write_buffer_with()`]: crate::Queue::write_buffer_with
27pub struct StagingBelt {
28 device: Device,
29 chunk_size: BufferAddress,
30 /// User-specified [`BufferUsages`] used to create the chunk buffers are created.
31 ///
32 /// [`new`](Self::new) guarantees that this always contains
33 /// [`MAP_WRITE`](BufferUsages::MAP_WRITE).
34 buffer_usages: BufferUsages,
35 /// Chunks into which we are accumulating data to be transferred.
36 active_chunks: Vec<Chunk>,
37 /// Chunks that have scheduled transfers already; they are unmapped and some
38 /// command encoder has one or more commands with them as source.
39 closed_chunks: Vec<Chunk>,
40 /// Chunks that are back from the GPU and ready to be mapped for write and put
41 /// into `active_chunks`.
42 free_chunks: Vec<Chunk>,
43 /// When closed chunks are mapped again, the map callback sends them here.
44 sender: Exclusive<mpsc::Sender<Chunk>>,
45 /// Free chunks are received here to be put on `self.free_chunks`.
46 receiver: Exclusive<mpsc::Receiver<Chunk>>,
47}
48
49impl StagingBelt {
50 /// Create a new staging belt.
51 ///
52 /// The `chunk_size` is the unit of internal buffer allocation; writes will be
53 /// sub-allocated within each chunk. Therefore, for optimal use of memory, the
54 /// chunk size should be:
55 ///
56 /// * larger than the largest single [`StagingBelt::write_buffer()`] operation;
57 /// * 1-4 times less than the total amount of data uploaded per submission
58 /// (per [`StagingBelt::finish()`]); and
59 /// * bigger is better, within these bounds.
60 ///
61 /// The buffers returned by this [`StagingBelt`] will be have the buffer usages
62 /// [`COPY_SRC | MAP_WRITE`](crate::BufferUsages)
63 pub fn new(device: Device, chunk_size: BufferAddress) -> Self {
64 Self::new_with_buffer_usages(device, chunk_size, BufferUsages::COPY_SRC)
65 }
66
67 /// Create a new staging belt.
68 ///
69 /// The `chunk_size` is the unit of internal buffer allocation; writes will be
70 /// sub-allocated within each chunk. Therefore, for optimal use of memory, the
71 /// chunk size should be:
72 ///
73 /// * larger than the largest single [`StagingBelt::write_buffer()`] operation;
74 /// * 1-4 times less than the total amount of data uploaded per submission
75 /// (per [`StagingBelt::finish()`]); and
76 /// * bigger is better, within these bounds.
77 ///
78 /// `buffer_usages` specifies the [`BufferUsages`] the staging buffers
79 /// will be created with. [`MAP_WRITE`](BufferUsages::MAP_WRITE) will be added
80 /// automatically. The method will panic if the combination of usages is not
81 /// supported. Because [`MAP_WRITE`](BufferUsages::MAP_WRITE) is implied, the allowed usages
82 /// depends on if [`Features::MAPPABLE_PRIMARY_BUFFERS`] is enabled.
83 /// - If enabled: any usage is valid.
84 /// - If disabled: only [`COPY_SRC`](BufferUsages::COPY_SRC) can be used.
85 #[track_caller]
86 pub fn new_with_buffer_usages(
87 device: Device,
88 chunk_size: BufferAddress,
89 mut buffer_usages: BufferUsages,
90 ) -> Self {
91 let (sender, receiver) = mpsc::channel();
92
93 // make sure anything other than MAP_WRITE | COPY_SRC is only allowed with MAPPABLE_PRIMARY_BUFFERS.
94 let extra_usages =
95 buffer_usages.difference(BufferUsages::MAP_WRITE | BufferUsages::COPY_SRC);
96 if !extra_usages.is_empty()
97 && !device
98 .features()
99 .contains(Features::MAPPABLE_PRIMARY_BUFFERS)
100 {
101 panic!("Only BufferUsages::COPY_SRC may be used when Features::MAPPABLE_PRIMARY_BUFFERS is not enabled. Specified buffer usages: {buffer_usages:?}");
102 }
103 // always set MAP_WRITE
104 buffer_usages.insert(BufferUsages::MAP_WRITE);
105
106 StagingBelt {
107 device,
108 chunk_size,
109 buffer_usages,
110 active_chunks: Vec::new(),
111 closed_chunks: Vec::new(),
112 free_chunks: Vec::new(),
113 sender: Exclusive::new(sender),
114 receiver: Exclusive::new(receiver),
115 }
116 }
117
118 /// Allocate a staging belt slice of `size` to be copied into the `target` buffer
119 /// at the specified offset.
120 ///
121 /// `offset` and `size` must be multiples of [`COPY_BUFFER_ALIGNMENT`]
122 /// (as is required by the underlying buffer operations).
123 ///
124 /// The upload will be placed into the provided command encoder. This encoder
125 /// must be submitted after [`StagingBelt::finish()`] is called and before
126 /// [`StagingBelt::recall()`] is called.
127 ///
128 /// If the `size` is greater than the size of any free internal buffer, a new buffer
129 /// will be allocated for it. Therefore, the `chunk_size` passed to [`StagingBelt::new()`]
130 /// should ideally be larger than every such size.
131 #[track_caller]
132 pub fn write_buffer(
133 &mut self,
134 encoder: &mut CommandEncoder,
135 target: &Buffer,
136 offset: BufferAddress,
137 size: BufferSize,
138 ) -> BufferViewMut {
139 // Asserting this explicitly gives a usefully more specific, and more prompt, error than
140 // leaving it to regular API validation.
141 // We check only `offset`, not `size`, because `self.allocate()` will check the size.
142 assert!(
143 offset.is_multiple_of(COPY_BUFFER_ALIGNMENT),
144 "StagingBelt::write_buffer() offset {offset} must be a multiple of `COPY_BUFFER_ALIGNMENT`"
145 );
146
147 let slice_of_belt = self.allocate(
148 size,
149 const { BufferSize::new(crate::COPY_BUFFER_ALIGNMENT).unwrap() },
150 );
151 encoder.copy_buffer_to_buffer(
152 slice_of_belt.buffer(),
153 slice_of_belt.offset(),
154 target,
155 offset,
156 size.get(),
157 );
158 slice_of_belt.get_mapped_range_mut()
159 }
160
161 /// Allocate a staging belt slice with the given `size` and `alignment` and return it.
162 ///
163 /// `size` must be a multiple of [`COPY_BUFFER_ALIGNMENT`]
164 /// (as is required by the underlying buffer operations).
165 ///
166 /// To use this slice, call [`BufferSlice::get_mapped_range_mut()`] and write your data into
167 /// that [`BufferViewMut`].
168 /// (The view must be dropped before [`StagingBelt::finish()`] is called.)
169 ///
170 /// You can then record your own GPU commands to perform with the slice,
171 /// such as copying it to a texture (whereas
172 /// [`StagingBelt::write_buffer()`] can only write to other buffers).
173 /// All commands involving this slice must be submitted after
174 /// [`StagingBelt::finish()`] is called and before [`StagingBelt::recall()`] is called.
175 ///
176 /// If the `size` is greater than the space available in any free internal buffer, a new buffer
177 /// will be allocated for it. Therefore, the `chunk_size` passed to [`StagingBelt::new()`]
178 /// should ideally be larger than every such size.
179 ///
180 /// The chosen slice will be positioned within the buffer at a multiple of `alignment`,
181 /// which may be used to meet alignment requirements for the operation you wish to perform
182 /// with the slice. This does not necessarily affect the alignment of the [`BufferViewMut`].
183 #[track_caller]
184 pub fn allocate(&mut self, size: BufferSize, alignment: BufferSize) -> BufferSlice<'_> {
185 assert!(
186 size.get().is_multiple_of(COPY_BUFFER_ALIGNMENT),
187 "StagingBelt allocation size {size} must be a multiple of `COPY_BUFFER_ALIGNMENT`"
188 );
189 assert!(
190 alignment.get().is_power_of_two(),
191 "alignment must be a power of two, not {alignment}"
192 );
193 // At minimum, we must have alignment sufficient to map the buffer.
194 let alignment = alignment.get().max(crate::MAP_ALIGNMENT);
195
196 let mut chunk = if let Some(index) = self
197 .active_chunks
198 .iter()
199 .position(|chunk| chunk.can_allocate(size, alignment))
200 {
201 self.active_chunks.swap_remove(index)
202 } else {
203 self.receive_chunks(); // ensure self.free_chunks is up to date
204
205 if let Some(index) = self
206 .free_chunks
207 .iter()
208 .position(|chunk| chunk.can_allocate(size, alignment))
209 {
210 self.free_chunks.swap_remove(index)
211 } else {
212 Chunk {
213 buffer: self.device.create_buffer(&BufferDescriptor {
214 label: Some("(wgpu internal) StagingBelt staging buffer"),
215 size: self.chunk_size.max(size.get()),
216 usage: self.buffer_usages,
217 mapped_at_creation: true,
218 }),
219 offset: 0,
220 }
221 }
222 };
223
224 let allocation_offset = chunk.allocate(size, alignment);
225
226 self.active_chunks.push(chunk);
227 let chunk = self.active_chunks.last().unwrap();
228
229 chunk
230 .buffer
231 .slice(allocation_offset..allocation_offset + size.get())
232 }
233
234 /// Prepare currently mapped buffers for use in a submission.
235 ///
236 /// This must be called before the command encoder(s) provided to
237 /// [`StagingBelt::write_buffer()`] are submitted.
238 ///
239 /// At this point, all the partially used staging buffers are closed (cannot be used for
240 /// further writes) until after [`StagingBelt::recall()`] is called *and* the GPU is done
241 /// copying the data from them.
242 pub fn finish(&mut self) {
243 for chunk in self.active_chunks.drain(..) {
244 chunk.buffer.unmap();
245 self.closed_chunks.push(chunk);
246 }
247 }
248
249 /// Recall all of the closed buffers back to be reused.
250 ///
251 /// This must only be called after the command encoder(s) provided to
252 /// [`StagingBelt::write_buffer()`] are submitted. Additional calls are harmless.
253 /// Not calling this as soon as possible may result in increased buffer memory usage.
254 pub fn recall(&mut self) {
255 self.receive_chunks();
256
257 for chunk in self.closed_chunks.drain(..) {
258 let sender = self.sender.get_mut().clone();
259 chunk
260 .buffer
261 .clone()
262 .slice(..)
263 .map_async(MapMode::Write, move |_| {
264 let _ = sender.send(chunk);
265 });
266 }
267 }
268
269 /// Move all chunks that the GPU is done with (and are now mapped again)
270 /// from `self.receiver` to `self.free_chunks`.
271 fn receive_chunks(&mut self) {
272 while let Ok(mut chunk) = self.receiver.get_mut().try_recv() {
273 chunk.offset = 0;
274 self.free_chunks.push(chunk);
275 }
276 }
277}
278
279impl fmt::Debug for StagingBelt {
280 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
281 let Self {
282 device,
283 chunk_size,
284 buffer_usages,
285 active_chunks,
286 closed_chunks,
287 free_chunks,
288 sender: _,
289 receiver: _,
290 } = self;
291 f.debug_struct("StagingBelt")
292 .field("device", device)
293 .field("chunk_size", chunk_size)
294 .field("buffer_usages", buffer_usages)
295 .field("active_chunks", &active_chunks.len())
296 .field("closed_chunks", &closed_chunks.len())
297 .field("free_chunks", &free_chunks.len())
298 .finish_non_exhaustive()
299 }
300}
301
302struct Chunk {
303 buffer: Buffer,
304 offset: BufferAddress,
305}
306
307impl Chunk {
308 fn can_allocate(&self, size: BufferSize, alignment: BufferAddress) -> bool {
309 let alloc_start = align_to(self.offset, alignment);
310 let alloc_end = alloc_start + size.get();
311
312 alloc_end <= self.buffer.size()
313 }
314
315 fn allocate(&mut self, size: BufferSize, alignment: BufferAddress) -> BufferAddress {
316 let alloc_start = align_to(self.offset, alignment);
317 let alloc_end = alloc_start + size.get();
318
319 assert!(alloc_end <= self.buffer.size());
320 self.offset = alloc_end;
321 alloc_start
322 }
323}
324
325use exclusive::Exclusive;
326mod exclusive {
327 /// `Sync` wrapper that works by providing only exclusive access.
328 ///
329 /// See <https://doc.rust-lang.org/nightly/std/sync/struct.Exclusive.html>
330 pub(super) struct Exclusive<T>(T);
331
332 /// Safety: `&Exclusive` has no operations.
333 unsafe impl<T> Sync for Exclusive<T> {}
334
335 impl<T> Exclusive<T> {
336 pub fn new(value: T) -> Self {
337 Self(value)
338 }
339
340 pub fn get_mut(&mut self) -> &mut T {
341 &mut self.0
342 }
343 }
344}