wgpu/util/belt.rs
1use crate::{
2 util::align_to, Buffer, BufferAddress, BufferDescriptor, BufferSize, BufferSlice, BufferUsages,
3 BufferViewMut, CommandEncoder, Device, MapMode,
4};
5use alloc::vec::Vec;
6use core::fmt;
7use std::sync::mpsc;
8
9/// Efficiently performs many buffer writes by sharing and reusing temporary buffers.
10///
11/// Internally it uses a ring-buffer of staging buffers that are sub-allocated.
12/// Its advantage over [`Queue::write_buffer_with()`] is that the individual allocations
13/// are cheaper; `StagingBelt` is most useful when you are writing very many small pieces
14/// of data. It can be understood as a sort of arena allocator.
15///
16/// Using a staging belt is slightly complicated, and generally goes as follows:
17/// 1. Use [`StagingBelt::write_buffer()`] or [`StagingBelt::allocate()`] to allocate
18/// buffer slices, then write your data to them.
19/// 2. Call [`StagingBelt::finish()`].
20/// 3. Submit all command encoders that were used in step 1.
21/// 4. Call [`StagingBelt::recall()`].
22///
23/// [`Queue::write_buffer_with()`]: crate::Queue::write_buffer_with
24pub struct StagingBelt {
25 chunk_size: BufferAddress,
26 /// Chunks into which we are accumulating data to be transferred.
27 active_chunks: Vec<Chunk>,
28 /// Chunks that have scheduled transfers already; they are unmapped and some
29 /// command encoder has one or more commands with them as source.
30 closed_chunks: Vec<Chunk>,
31 /// Chunks that are back from the GPU and ready to be mapped for write and put
32 /// into `active_chunks`.
33 free_chunks: Vec<Chunk>,
34 /// When closed chunks are mapped again, the map callback sends them here.
35 sender: Exclusive<mpsc::Sender<Chunk>>,
36 /// Free chunks are received here to be put on `self.free_chunks`.
37 receiver: Exclusive<mpsc::Receiver<Chunk>>,
38}
39
40impl StagingBelt {
41 /// Create a new staging belt.
42 ///
43 /// The `chunk_size` is the unit of internal buffer allocation; writes will be
44 /// sub-allocated within each chunk. Therefore, for optimal use of memory, the
45 /// chunk size should be:
46 ///
47 /// * larger than the largest single [`StagingBelt::write_buffer()`] operation;
48 /// * 1-4 times less than the total amount of data uploaded per submission
49 /// (per [`StagingBelt::finish()`]); and
50 /// * bigger is better, within these bounds.
51 pub fn new(chunk_size: BufferAddress) -> Self {
52 let (sender, receiver) = mpsc::channel();
53 StagingBelt {
54 chunk_size,
55 active_chunks: Vec::new(),
56 closed_chunks: Vec::new(),
57 free_chunks: Vec::new(),
58 sender: Exclusive::new(sender),
59 receiver: Exclusive::new(receiver),
60 }
61 }
62
63 /// Allocate a staging belt slice of `size` to be copied into the `target` buffer
64 /// at the specified offset.
65 ///
66 /// The upload will be placed into the provided command encoder. This encoder
67 /// must be submitted after [`StagingBelt::finish()`] is called and before
68 /// [`StagingBelt::recall()`] is called.
69 ///
70 /// If the `size` is greater than the size of any free internal buffer, a new buffer
71 /// will be allocated for it. Therefore, the `chunk_size` passed to [`StagingBelt::new()`]
72 /// should ideally be larger than every such size.
73 pub fn write_buffer(
74 &mut self,
75 encoder: &mut CommandEncoder,
76 target: &Buffer,
77 offset: BufferAddress,
78 size: BufferSize,
79 device: &Device,
80 ) -> BufferViewMut {
81 let slice_of_belt = self.allocate(
82 size,
83 const { BufferSize::new(crate::COPY_BUFFER_ALIGNMENT).unwrap() },
84 device,
85 );
86 encoder.copy_buffer_to_buffer(
87 slice_of_belt.buffer(),
88 slice_of_belt.offset(),
89 target,
90 offset,
91 size.get(),
92 );
93 slice_of_belt.get_mapped_range_mut()
94 }
95
96 /// Allocate a staging belt slice with the given `size` and `alignment` and return it.
97 ///
98 /// To use this slice, call [`BufferSlice::get_mapped_range_mut()`] and write your data into
99 /// that [`BufferViewMut`].
100 /// (The view must be dropped before [`StagingBelt::finish()`] is called.)
101 ///
102 /// You can then record your own GPU commands to perform with the slice,
103 /// such as copying it to a texture or executing a compute shader that reads it (whereas
104 /// [`StagingBelt::write_buffer()`] can only write to other buffers).
105 /// All commands involving this slice must be submitted after
106 /// [`StagingBelt::finish()`] is called and before [`StagingBelt::recall()`] is called.
107 ///
108 /// If the `size` is greater than the space available in any free internal buffer, a new buffer
109 /// will be allocated for it. Therefore, the `chunk_size` passed to [`StagingBelt::new()`]
110 /// should ideally be larger than every such size.
111 ///
112 /// The chosen slice will be positioned within the buffer at a multiple of `alignment`,
113 /// which may be used to meet alignment requirements for the operation you wish to perform
114 /// with the slice. This does not necessarily affect the alignment of the [`BufferViewMut`].
115 pub fn allocate(
116 &mut self,
117 size: BufferSize,
118 alignment: BufferSize,
119 device: &Device,
120 ) -> BufferSlice<'_> {
121 assert!(
122 alignment.get().is_power_of_two(),
123 "alignment must be a power of two, not {alignment}"
124 );
125 // At minimum, we must have alignment sufficient to map the buffer.
126 let alignment = alignment.get().max(crate::MAP_ALIGNMENT);
127
128 let mut chunk = if let Some(index) = self
129 .active_chunks
130 .iter()
131 .position(|chunk| chunk.can_allocate(size, alignment))
132 {
133 self.active_chunks.swap_remove(index)
134 } else {
135 self.receive_chunks(); // ensure self.free_chunks is up to date
136
137 if let Some(index) = self
138 .free_chunks
139 .iter()
140 .position(|chunk| chunk.can_allocate(size, alignment))
141 {
142 self.free_chunks.swap_remove(index)
143 } else {
144 Chunk {
145 buffer: device.create_buffer(&BufferDescriptor {
146 label: Some("(wgpu internal) StagingBelt staging buffer"),
147 size: self.chunk_size.max(size.get()),
148 usage: BufferUsages::MAP_WRITE | BufferUsages::COPY_SRC,
149 mapped_at_creation: true,
150 }),
151 offset: 0,
152 }
153 }
154 };
155
156 let allocation_offset = chunk.allocate(size, alignment);
157
158 self.active_chunks.push(chunk);
159 let chunk = self.active_chunks.last().unwrap();
160
161 chunk
162 .buffer
163 .slice(allocation_offset..allocation_offset + size.get())
164 }
165
166 /// Prepare currently mapped buffers for use in a submission.
167 ///
168 /// This must be called before the command encoder(s) provided to
169 /// [`StagingBelt::write_buffer()`] are submitted.
170 ///
171 /// At this point, all the partially used staging buffers are closed (cannot be used for
172 /// further writes) until after [`StagingBelt::recall()`] is called *and* the GPU is done
173 /// copying the data from them.
174 pub fn finish(&mut self) {
175 for chunk in self.active_chunks.drain(..) {
176 chunk.buffer.unmap();
177 self.closed_chunks.push(chunk);
178 }
179 }
180
181 /// Recall all of the closed buffers back to be reused.
182 ///
183 /// This must only be called after the command encoder(s) provided to
184 /// [`StagingBelt::write_buffer()`] are submitted. Additional calls are harmless.
185 /// Not calling this as soon as possible may result in increased buffer memory usage.
186 pub fn recall(&mut self) {
187 self.receive_chunks();
188
189 for chunk in self.closed_chunks.drain(..) {
190 let sender = self.sender.get_mut().clone();
191 chunk
192 .buffer
193 .clone()
194 .slice(..)
195 .map_async(MapMode::Write, move |_| {
196 let _ = sender.send(chunk);
197 });
198 }
199 }
200
201 /// Move all chunks that the GPU is done with (and are now mapped again)
202 /// from `self.receiver` to `self.free_chunks`.
203 fn receive_chunks(&mut self) {
204 while let Ok(mut chunk) = self.receiver.get_mut().try_recv() {
205 chunk.offset = 0;
206 self.free_chunks.push(chunk);
207 }
208 }
209}
210
211impl fmt::Debug for StagingBelt {
212 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
213 f.debug_struct("StagingBelt")
214 .field("chunk_size", &self.chunk_size)
215 .field("active_chunks", &self.active_chunks.len())
216 .field("closed_chunks", &self.closed_chunks.len())
217 .field("free_chunks", &self.free_chunks.len())
218 .finish_non_exhaustive()
219 }
220}
221
222struct Chunk {
223 buffer: Buffer,
224 offset: BufferAddress,
225}
226
227impl Chunk {
228 fn can_allocate(&self, size: BufferSize, alignment: BufferAddress) -> bool {
229 let alloc_start = align_to(self.offset, alignment);
230 let alloc_end = alloc_start + size.get();
231
232 alloc_end <= self.buffer.size()
233 }
234
235 fn allocate(&mut self, size: BufferSize, alignment: BufferAddress) -> BufferAddress {
236 let alloc_start = align_to(self.offset, alignment);
237 let alloc_end = alloc_start + size.get();
238
239 assert!(alloc_end <= self.buffer.size());
240 self.offset = alloc_end;
241 alloc_start
242 }
243}
244
245use exclusive::Exclusive;
246mod exclusive {
247 /// `Sync` wrapper that works by providing only exclusive access.
248 ///
249 /// See <https://doc.rust-lang.org/nightly/std/sync/struct.Exclusive.html>
250 pub(super) struct Exclusive<T>(T);
251
252 /// Safety: `&Exclusive` has no operations.
253 unsafe impl<T> Sync for Exclusive<T> {}
254
255 impl<T> Exclusive<T> {
256 pub fn new(value: T) -> Self {
257 Self(value)
258 }
259
260 pub fn get_mut(&mut self) -> &mut T {
261 &mut self.0
262 }
263 }
264}