wgpu/api/queue.rs
1use alloc::boxed::Box;
2use core::ops::{Deref, DerefMut};
3
4use crate::*;
5
6/// Handle to a command queue on a device.
7///
8/// A `Queue` executes recorded [`CommandBuffer`] objects and provides convenience methods
9/// for writing to [buffers](Queue::write_buffer) and [textures](Queue::write_texture).
10/// It can be created along with a [`Device`] by calling [`Adapter::request_device`].
11///
12/// Corresponds to [WebGPU `GPUQueue`](https://gpuweb.github.io/gpuweb/#gpu-queue).
13#[derive(Debug, Clone)]
14pub struct Queue {
15 pub(crate) inner: dispatch::DispatchQueue,
16}
17#[cfg(send_sync)]
18static_assertions::assert_impl_all!(Queue: Send, Sync);
19
20crate::cmp::impl_eq_ord_hash_proxy!(Queue => .inner);
21
22impl Queue {
23 #[cfg(custom)]
24 /// Returns custom implementation of Queue (if custom backend and is internally T)
25 pub fn as_custom<T: custom::QueueInterface>(&self) -> Option<&T> {
26 self.inner.as_custom()
27 }
28
29 #[cfg(custom)]
30 /// Creates Queue from custom implementation
31 pub fn from_custom<T: custom::QueueInterface>(queue: T) -> Self {
32 Self {
33 inner: dispatch::DispatchQueue::custom(queue),
34 }
35 }
36}
37
38/// Identifier for a particular call to [`Queue::submit`]. Can be used
39/// as part of an argument to [`Device::poll`] to block for a particular
40/// submission to finish.
41///
42/// This type is unique to the Rust API of `wgpu`.
43/// There is no analogue in the WebGPU specification.
44#[derive(Debug, Clone)]
45pub struct SubmissionIndex {
46 pub(crate) index: u64,
47}
48#[cfg(send_sync)]
49static_assertions::assert_impl_all!(SubmissionIndex: Send, Sync);
50
51/// Passed to [`Device::poll`] to control how and if it should block.
52pub type PollType = wgt::PollType<SubmissionIndex>;
53#[cfg(send_sync)]
54static_assertions::assert_impl_all!(PollType: Send, Sync);
55
56/// A write-only view into a staging buffer.
57///
58/// Reading into this buffer won't yield the contents of the buffer from the
59/// GPU and is likely to be slow. Because of this, although [`AsMut`] is
60/// implemented for this type, [`AsRef`] is not.
61pub struct QueueWriteBufferView<'a> {
62 queue: &'a Queue,
63 buffer: &'a Buffer,
64 offset: BufferAddress,
65 inner: dispatch::DispatchQueueWriteBuffer,
66}
67#[cfg(send_sync)]
68static_assertions::assert_impl_all!(QueueWriteBufferView<'_>: Send, Sync);
69
70impl QueueWriteBufferView<'_> {
71 #[cfg(custom)]
72 /// Returns custom implementation of QueueWriteBufferView (if custom backend and is internally T)
73 pub fn as_custom<T: custom::QueueWriteBufferInterface>(&self) -> Option<&T> {
74 self.inner.as_custom()
75 }
76}
77
78impl Deref for QueueWriteBufferView<'_> {
79 type Target = [u8];
80
81 fn deref(&self) -> &Self::Target {
82 log::warn!("Reading from a QueueWriteBufferView won't yield the contents of the buffer and may be slow.");
83 self.inner.slice()
84 }
85}
86
87impl DerefMut for QueueWriteBufferView<'_> {
88 fn deref_mut(&mut self) -> &mut Self::Target {
89 self.inner.slice_mut()
90 }
91}
92
93impl AsMut<[u8]> for QueueWriteBufferView<'_> {
94 fn as_mut(&mut self) -> &mut [u8] {
95 self.inner.slice_mut()
96 }
97}
98
99impl Drop for QueueWriteBufferView<'_> {
100 fn drop(&mut self) {
101 self.queue
102 .inner
103 .write_staging_buffer(&self.buffer.inner, self.offset, &self.inner);
104 }
105}
106
107impl Queue {
108 /// Copies the bytes of `data` into `buffer` starting at `offset`.
109 ///
110 /// The data must be written fully in-bounds, that is, `offset + data.len() <= buffer.len()`.
111 ///
112 /// # Performance considerations
113 ///
114 /// * Calls to `write_buffer()` do *not* submit the transfer to the GPU
115 /// immediately. They begin GPU execution only on the next call to
116 /// [`Queue::submit()`], just before the explicitly submitted commands.
117 /// To get a set of scheduled transfers started immediately,
118 /// it's fine to call `submit` with no command buffers at all:
119 ///
120 /// ```no_run
121 /// # let queue: wgpu::Queue = todo!();
122 /// # let buffer: wgpu::Buffer = todo!();
123 /// # let data = [0u8];
124 /// queue.write_buffer(&buffer, 0, &data);
125 /// queue.submit([]);
126 /// ```
127 ///
128 /// However, `data` will be immediately copied into staging memory, so the
129 /// caller may discard it any time after this call completes.
130 ///
131 /// * Consider using [`Queue::write_buffer_with()`] instead.
132 /// That method allows you to prepare your data directly within the staging
133 /// memory, rather than first placing it in a separate `[u8]` to be copied.
134 /// That is, `queue.write_buffer(b, offset, data)` is approximately equivalent
135 /// to `queue.write_buffer_with(b, offset, data.len()).copy_from_slice(data)`,
136 /// so use `write_buffer_with()` if you can do something smarter than that
137 /// [`copy_from_slice()`](slice::copy_from_slice). However, for small values
138 /// (e.g. a typical uniform buffer whose contents come from a `struct`),
139 /// there will likely be no difference, since the compiler will be able to
140 /// optimize out unnecessary copies regardless.
141 ///
142 /// * Currently on native platforms, for both of these methods, the staging
143 /// memory will be a new allocation. This will then be released after the
144 /// next submission finishes. To entirely avoid short-lived allocations, you might
145 /// be able to use [`StagingBelt`](crate::util::StagingBelt),
146 /// or buffers you explicitly create, map, and unmap yourself.
147 pub fn write_buffer(&self, buffer: &Buffer, offset: BufferAddress, data: &[u8]) {
148 self.inner.write_buffer(&buffer.inner, offset, data);
149 }
150
151 /// Prepares to write data to a buffer via a mapped staging buffer.
152 ///
153 /// This operation allocates a temporary buffer and then returns a
154 /// [`QueueWriteBufferView`], which
155 ///
156 /// * dereferences to a `[u8]` of length `size`, and
157 /// * when dropped, schedules a copy of its contents into `buffer` at `offset`.
158 ///
159 /// Therefore, this obtains the same result as [`Queue::write_buffer()`], but may
160 /// allow you to skip one allocation and one copy of your data, if you are able to
161 /// assemble your data directly into the returned [`QueueWriteBufferView`] instead of
162 /// into a separate allocation like a [`Vec`](alloc::vec::Vec) first.
163 ///
164 /// The data must be written fully in-bounds, that is, `offset + size <= buffer.len()`.
165 ///
166 /// # Performance considerations
167 ///
168 /// * For small data not separately heap-allocated, there is no advantage of this
169 /// over [`Queue::write_buffer()`].
170 ///
171 /// * Reading from the returned view may be slow, and will not yield the current
172 /// contents of `buffer`. You should treat it as “write-only”.
173 ///
174 /// * Dropping the [`QueueWriteBufferView`] does *not* submit the
175 /// transfer to the GPU immediately. The transfer begins only on the next
176 /// call to [`Queue::submit()`] after the view is dropped, just before the
177 /// explicitly submitted commands. To get a set of scheduled transfers started
178 /// immediately, it's fine to call `queue.submit([])` with no command buffers at all.
179 ///
180 /// * Currently on native platforms, the staging memory will be a new allocation, which will
181 /// then be released after the next submission finishes. To entirely avoid short-lived
182 /// allocations, you might be able to use [`StagingBelt`](crate::util::StagingBelt),
183 /// or buffers you explicitly create, map, and unmap yourself.
184 #[must_use]
185 pub fn write_buffer_with<'a>(
186 &'a self,
187 buffer: &'a Buffer,
188 offset: BufferAddress,
189 size: BufferSize,
190 ) -> Option<QueueWriteBufferView<'a>> {
191 profiling::scope!("Queue::write_buffer_with");
192 self.inner
193 .validate_write_buffer(&buffer.inner, offset, size)?;
194 let staging_buffer = self.inner.create_staging_buffer(size)?;
195 Some(QueueWriteBufferView {
196 queue: self,
197 buffer,
198 offset,
199 inner: staging_buffer,
200 })
201 }
202
203 /// Copies the bytes of `data` into into a texture.
204 ///
205 /// * `data` contains the texels to be written, which must be in
206 /// [the same format as the texture](TextureFormat).
207 /// * `data_layout` describes the memory layout of `data`, which does not necessarily
208 /// have to have tightly packed rows.
209 /// * `texture` specifies the texture to write into, and the location within the
210 /// texture (coordinate offset, mip level) that will be overwritten.
211 /// * `size` is the size, in texels, of the region to be written.
212 ///
213 /// This method fails if `size` overruns the size of `texture`, or if `data` is too short.
214 ///
215 /// # Performance considerations
216 ///
217 /// This operation has the same performance considerations as [`Queue::write_buffer()`];
218 /// see its documentation for details.
219 ///
220 /// However, since there is no “mapped texture” like a mapped buffer,
221 /// alternate techniques for writing to textures will generally consist of first copying
222 /// the data to a buffer, then using [`CommandEncoder::copy_buffer_to_texture()`], or in
223 /// some cases a compute shader, to copy texels from that buffer to the texture.
224 pub fn write_texture(
225 &self,
226 texture: TexelCopyTextureInfo<'_>,
227 data: &[u8],
228 data_layout: TexelCopyBufferLayout,
229 size: Extent3d,
230 ) {
231 self.inner.write_texture(texture, data, data_layout, size);
232 }
233
234 /// Schedule a copy of data from `image` into `texture`.
235 #[cfg(web)]
236 pub fn copy_external_image_to_texture(
237 &self,
238 source: &wgt::CopyExternalImageSourceInfo,
239 dest: wgt::CopyExternalImageDestInfo<&api::Texture>,
240 size: Extent3d,
241 ) {
242 self.inner
243 .copy_external_image_to_texture(source, dest, size);
244 }
245
246 /// Submits a series of finished command buffers for execution.
247 pub fn submit<I: IntoIterator<Item = CommandBuffer>>(
248 &self,
249 command_buffers: I,
250 ) -> SubmissionIndex {
251 let mut command_buffers = command_buffers.into_iter().map(|comb| comb.buffer);
252
253 let index = self.inner.submit(&mut command_buffers);
254
255 SubmissionIndex { index }
256 }
257
258 /// Gets the amount of nanoseconds each tick of a timestamp query represents.
259 ///
260 /// Returns zero if timestamp queries are unsupported.
261 ///
262 /// Timestamp values are represented in nanosecond values on WebGPU, see `<https://gpuweb.github.io/gpuweb/#timestamp>`
263 /// Therefore, this is always 1.0 on the web, but on wgpu-core a manual conversion is required.
264 pub fn get_timestamp_period(&self) -> f32 {
265 self.inner.get_timestamp_period()
266 }
267
268 /// Registers a callback when the previous call to submit finishes running on the gpu. This callback
269 /// being called implies that all mapped buffer callbacks which were registered before this call will
270 /// have been called.
271 ///
272 /// For the callback to complete, either `queue.submit(..)`, `instance.poll_all(..)`, or `device.poll(..)`
273 /// must be called elsewhere in the runtime, possibly integrated into an event loop or run on a separate thread.
274 ///
275 /// The callback will be called on the thread that first calls the above functions after the gpu work
276 /// has completed. There are no restrictions on the code you can run in the callback, however on native the
277 /// call to the function will not complete until the callback returns, so prefer keeping callbacks short
278 /// and used to set flags, send messages, etc.
279 pub fn on_submitted_work_done(&self, callback: impl FnOnce() + Send + 'static) {
280 self.inner.on_submitted_work_done(Box::new(callback));
281 }
282
283 /// Get the [`wgpu_hal`] device from this `Queue`.
284 ///
285 /// Find the Api struct corresponding to the active backend in [`wgpu_hal::api`],
286 /// and pass that struct to the to the `A` type parameter.
287 ///
288 /// Returns a guard that dereferences to the type of the hal backend
289 /// which implements [`A::Queue`].
290 ///
291 /// # Types
292 ///
293 /// The returned type depends on the backend:
294 ///
295 #[doc = crate::hal_type_vulkan!("Queue")]
296 #[doc = crate::hal_type_metal!("Queue")]
297 #[doc = crate::hal_type_dx12!("Queue")]
298 #[doc = crate::hal_type_gles!("Queue")]
299 ///
300 /// # Errors
301 ///
302 /// This method will return None if:
303 /// - The queue is not from the backend specified by `A`.
304 /// - The queue is from the `webgpu` or `custom` backend.
305 ///
306 /// # Safety
307 ///
308 /// - The returned resource must not be destroyed unless the guard
309 /// is the last reference to it and it is not in use by the GPU.
310 /// The guard and handle may be dropped at any time however.
311 /// - All the safety requirements of wgpu-hal must be upheld.
312 ///
313 /// [`A::Queue`]: hal::Api::Queue
314 #[cfg(wgpu_core)]
315 pub unsafe fn as_hal<A: hal::Api>(
316 &self,
317 ) -> Option<impl Deref<Target = A::Queue> + WasmNotSendSync> {
318 let queue = self.inner.as_core_opt()?;
319 unsafe { queue.context.queue_as_hal::<A>(queue) }
320 }
321
322 /// Compact a BLAS, it must have had [`Blas::prepare_compaction_async`] called on it and had the
323 /// callback provided called.
324 ///
325 /// The returned BLAS is more restricted than a normal BLAS because it may not be rebuilt or
326 /// compacted.
327 pub fn compact_blas(&self, blas: &Blas) -> Blas {
328 let (handle, dispatch) = self.inner.compact_blas(&blas.inner);
329 Blas {
330 handle,
331 inner: dispatch,
332 }
333 }
334}