wgpu/api/
queue.rs

1use alloc::boxed::Box;
2use core::ops::{Deref, RangeBounds};
3
4use crate::{api::DeferredCommandBufferActions, *};
5
6/// Handle to a command queue on a device.
7///
8/// A `Queue` executes recorded [`CommandBuffer`] objects and provides convenience methods
9/// for writing to [buffers](Queue::write_buffer) and [textures](Queue::write_texture).
10/// It can be created along with a [`Device`] by calling [`Adapter::request_device`].
11///
12/// Corresponds to [WebGPU `GPUQueue`](https://gpuweb.github.io/gpuweb/#gpu-queue).
13#[derive(Debug, Clone)]
14pub struct Queue {
15    pub(crate) inner: dispatch::DispatchQueue,
16}
17#[cfg(send_sync)]
18static_assertions::assert_impl_all!(Queue: Send, Sync);
19
20crate::cmp::impl_eq_ord_hash_proxy!(Queue => .inner);
21
22impl Queue {
23    #[cfg(custom)]
24    /// Returns custom implementation of Queue (if custom backend and is internally T)
25    pub fn as_custom<T: custom::QueueInterface>(&self) -> Option<&T> {
26        self.inner.as_custom()
27    }
28
29    #[cfg(custom)]
30    /// Creates Queue from custom implementation
31    pub fn from_custom<T: custom::QueueInterface>(queue: T) -> Self {
32        Self {
33            inner: dispatch::DispatchQueue::custom(queue),
34        }
35    }
36}
37
38/// Identifier for a particular call to [`Queue::submit`]. Can be used
39/// as part of an argument to [`Device::poll`] to block for a particular
40/// submission to finish.
41///
42/// This type is unique to the Rust API of `wgpu`.
43/// There is no analogue in the WebGPU specification.
44#[derive(Debug, Clone)]
45pub struct SubmissionIndex {
46    pub(crate) index: u64,
47}
48#[cfg(send_sync)]
49static_assertions::assert_impl_all!(SubmissionIndex: Send, Sync);
50
51/// Passed to [`Device::poll`] to control how and if it should block.
52pub type PollType = wgt::PollType<SubmissionIndex>;
53#[cfg(send_sync)]
54static_assertions::assert_impl_all!(PollType: Send, Sync);
55
56/// A write-only view into a staging buffer.
57///
58/// This type is what [`Queue::write_buffer_with()`] returns.
59pub struct QueueWriteBufferView {
60    queue: Queue,
61    buffer: Buffer,
62    offset: BufferAddress,
63    inner: dispatch::DispatchQueueWriteBuffer,
64}
65#[cfg(send_sync)]
66static_assertions::assert_impl_all!(QueueWriteBufferView: Send, Sync);
67
68impl QueueWriteBufferView {
69    #[cfg(custom)]
70    /// Returns custom implementation of QueueWriteBufferView (if custom backend and is internally T)
71    pub fn as_custom<T: custom::QueueWriteBufferInterface>(&self) -> Option<&T> {
72        self.inner.as_custom()
73    }
74}
75
76impl Drop for QueueWriteBufferView {
77    fn drop(&mut self) {
78        self.queue
79            .inner
80            .write_staging_buffer(&self.buffer.inner, self.offset, &self.inner);
81    }
82}
83
84/// These methods are equivalent to the methods of the same names on [`WriteOnly`].
85impl QueueWriteBufferView {
86    /// Returns the length of this view; the number of bytes to be written.
87    pub fn len(&self) -> usize {
88        self.inner.len()
89    }
90
91    /// Returns `true` if the view has a length of 0.
92    pub fn is_empty(&self) -> bool {
93        self.len() == 0
94    }
95
96    /// Returns a [`WriteOnly`] reference to a portion of this.
97    ///
98    /// `.slice(..)` can be used to access the whole data.
99    pub fn slice<'a, S: RangeBounds<usize>>(&'a mut self, bounds: S) -> WriteOnly<'a, [u8]> {
100        // SAFETY:
101        // * this is a write mapping
102        // * function signature ensures no aliasing
103        unsafe { self.inner.write_slice() }.into_slice(bounds)
104    }
105
106    /// Copies all elements from src into `self`.
107    ///
108    /// The length of `src` must be the same as `self`.
109    ///
110    /// This method is equivalent to
111    /// [`self.slice(..).copy_from_slice(src)`][WriteOnly::copy_from_slice].
112    pub fn copy_from_slice(&mut self, src: &[u8]) {
113        self.slice(..).copy_from_slice(src)
114    }
115}
116
117impl Queue {
118    /// Copies the bytes of `data` into `buffer` starting at `offset`.
119    ///
120    /// The data must be written fully in-bounds, that is, `offset + data.len() <= buffer.len()`.
121    ///
122    /// # Performance considerations
123    ///
124    /// * Calls to `write_buffer()` do *not* submit the transfer to the GPU
125    ///   immediately. They begin GPU execution only on the next call to
126    ///   [`Queue::submit()`], just before the explicitly submitted commands.
127    ///   To get a set of scheduled transfers started immediately,
128    ///   it's fine to call `submit` with no command buffers at all:
129    ///
130    ///   ```no_run
131    ///   # let queue: wgpu::Queue = todo!();
132    ///   # let buffer: wgpu::Buffer = todo!();
133    ///   # let data = [0u8];
134    ///   queue.write_buffer(&buffer, 0, &data);
135    ///   queue.submit([]);
136    ///   ```
137    ///
138    ///   However, `data` will be immediately copied into staging memory, so the
139    ///   caller may discard it any time after this call completes.
140    ///
141    /// * Consider using [`Queue::write_buffer_with()`] instead.
142    ///   That method allows you to prepare your data directly within the staging
143    ///   memory, rather than first placing it in a separate `[u8]` to be copied.
144    ///   That is, `queue.write_buffer(b, offset, data)` is approximately equivalent
145    ///   to `queue.write_buffer_with(b, offset, data.len()).copy_from_slice(data)`,
146    ///   so use `write_buffer_with()` if you can do something smarter than that
147    ///   [`copy_from_slice()`](slice::copy_from_slice). However, for small values
148    ///   (e.g. a typical uniform buffer whose contents come from a `struct`),
149    ///   there will likely be no difference, since the compiler will be able to
150    ///   optimize out unnecessary copies regardless.
151    ///
152    /// * Currently on native platforms, for both of these methods, the staging
153    ///   memory will be a new allocation. This will then be released after the
154    ///   next submission finishes. To entirely avoid short-lived allocations, you might
155    ///   be able to use [`StagingBelt`](crate::util::StagingBelt),
156    ///   or buffers you explicitly create, map, and unmap yourself.
157    pub fn write_buffer(&self, buffer: &Buffer, offset: BufferAddress, data: &[u8]) {
158        self.inner.write_buffer(&buffer.inner, offset, data);
159    }
160
161    /// Prepares to write data to a buffer via a mapped staging buffer.
162    ///
163    /// This operation allocates a temporary buffer and then returns a
164    /// [`QueueWriteBufferView`], which
165    ///
166    /// * dereferences to a `[u8]` of length `size`, and
167    /// * when dropped, schedules a copy of its contents into `buffer` at `offset`.
168    ///
169    /// Therefore, this obtains the same result as [`Queue::write_buffer()`], but may
170    /// allow you to skip one allocation and one copy of your data, if you are able to
171    /// assemble your data directly into the returned [`QueueWriteBufferView`] instead of
172    /// into a separate allocation like a [`Vec`](alloc::vec::Vec) first.
173    ///
174    /// The data must be written fully in-bounds, that is, `offset + size <= buffer.len()`.
175    ///
176    /// # Performance considerations
177    ///
178    /// * For small data not separately heap-allocated, there is no advantage of this
179    ///   over [`Queue::write_buffer()`].
180    ///
181    /// * Reading from the returned view may be slow, and will not yield the current
182    ///   contents of `buffer`. You should treat it as “write-only”.
183    ///
184    /// * Dropping the [`QueueWriteBufferView`] does *not* submit the
185    ///   transfer to the GPU immediately. The transfer begins only on the next
186    ///   call to [`Queue::submit()`] after the view is dropped, just before the
187    ///   explicitly submitted commands. To get a set of scheduled transfers started
188    ///   immediately, it's fine to call `queue.submit([])` with no command buffers at all.
189    ///
190    /// * Currently on native platforms, the staging memory will be a new allocation, which will
191    ///   then be released after the next submission finishes. To entirely avoid short-lived
192    ///   allocations, you might be able to use [`StagingBelt`](crate::util::StagingBelt),
193    ///   or buffers you explicitly create, map, and unmap yourself.
194    #[must_use]
195    pub fn write_buffer_with(
196        &self,
197        buffer: &Buffer,
198        offset: BufferAddress,
199        size: BufferSize,
200    ) -> Option<QueueWriteBufferView> {
201        profiling::scope!("Queue::write_buffer_with");
202        self.inner
203            .validate_write_buffer(&buffer.inner, offset, size)?;
204        let staging_buffer = self.inner.create_staging_buffer(size)?;
205        Some(QueueWriteBufferView {
206            queue: self.clone(),
207            buffer: buffer.clone(),
208            offset,
209            inner: staging_buffer,
210        })
211    }
212
213    /// Copies the bytes of `data` into a texture.
214    ///
215    /// * `data` contains the texels to be written, which must be in
216    ///   [the same format as the texture](TextureFormat).
217    /// * `data_layout` describes the memory layout of `data`, which does not necessarily
218    ///   have to have tightly packed rows.
219    /// * `texture` specifies the texture to write into, and the location within the
220    ///   texture (coordinate offset, mip level) that will be overwritten.
221    /// * `size` is the size, in texels, of the region to be written.
222    ///
223    /// This method fails if `size` overruns the size of `texture`, or if `data` is too short.
224    ///
225    /// # Performance considerations
226    ///
227    /// This operation has the same performance considerations as [`Queue::write_buffer()`];
228    /// see its documentation for details.
229    ///
230    /// However, since there is no “mapped texture” like a mapped buffer,
231    /// alternate techniques for writing to textures will generally consist of first copying
232    /// the data to a buffer, then using [`CommandEncoder::copy_buffer_to_texture()`], or in
233    /// some cases a compute shader, to copy texels from that buffer to the texture.
234    pub fn write_texture(
235        &self,
236        texture: TexelCopyTextureInfo<'_>,
237        data: &[u8],
238        data_layout: TexelCopyBufferLayout,
239        size: Extent3d,
240    ) {
241        self.inner.write_texture(texture, data, data_layout, size);
242    }
243
244    /// Schedule a copy of data from `image` into `texture`.
245    #[cfg(web)]
246    pub fn copy_external_image_to_texture(
247        &self,
248        source: &wgt::CopyExternalImageSourceInfo,
249        dest: wgt::CopyExternalImageDestInfo<&api::Texture>,
250        size: Extent3d,
251    ) {
252        self.inner
253            .copy_external_image_to_texture(source, dest, size);
254    }
255
256    /// Submits a series of finished command buffers for execution.
257    pub fn submit<I: IntoIterator<Item = CommandBuffer>>(
258        &self,
259        command_buffers: I,
260    ) -> SubmissionIndex {
261        // As submit drains the iterator (even on error), collect deferred actions
262        // from each CommandBuffer along the way.
263        let mut actions = DeferredCommandBufferActions::default();
264
265        let mut command_buffers = command_buffers.into_iter().map(|comb| {
266            actions.append(&mut comb.actions.lock());
267            comb.buffer
268        });
269        let index = self.inner.submit(&mut command_buffers);
270
271        // Execute all deferred actions after submit.
272        actions.execute(&self.inner);
273
274        SubmissionIndex { index }
275    }
276
277    /// Gets the amount of nanoseconds each tick of a timestamp query represents.
278    ///
279    /// Returns zero if timestamp queries are unsupported.
280    ///
281    /// Timestamp values are represented in nanosecond values on WebGPU, see <https://gpuweb.github.io/gpuweb/#timestamp>
282    /// Therefore, this is always 1.0 on the web, but on wgpu-core a manual conversion is required.
283    pub fn get_timestamp_period(&self) -> f32 {
284        self.inner.get_timestamp_period()
285    }
286
287    /// Registers a callback that is invoked when the previous [`Queue::submit`] finishes executing
288    /// on the GPU. When this callback runs, all mapped-buffer callbacks registered for the same
289    /// submission are guaranteed to have been called.
290    ///
291    /// For the callback to run, either [`queue.submit(..)`][q::s], [`instance.poll_all(..)`][i::p_a],
292    /// or [`device.poll(..)`][d::p] must be called elsewhere in the runtime, possibly integrated into
293    /// an event loop or run on a separate thread.
294    ///
295    /// The callback runs on the thread that first calls one of the above functions after the GPU work
296    /// completes. There are no restrictions on the code you can run in the callback; however, on native
297    /// the polling call will not return until the callback finishes, so keep callbacks short (set flags,
298    /// send messages, etc.).
299    ///
300    /// [q::s]: Queue::submit
301    /// [i::p_a]: Instance::poll_all
302    /// [d::p]: Device::poll
303    pub fn on_submitted_work_done(&self, callback: impl FnOnce() + Send + 'static) {
304        self.inner.on_submitted_work_done(Box::new(callback));
305    }
306
307    /// Get the [`wgpu_hal`] device from this `Queue`.
308    ///
309    /// Find the Api struct corresponding to the active backend in [`wgpu_hal::api`],
310    /// and pass that struct to the to the `A` type parameter.
311    ///
312    /// Returns a guard that dereferences to the type of the hal backend
313    /// which implements [`A::Queue`].
314    ///
315    /// # Types
316    ///
317    /// The returned type depends on the backend:
318    ///
319    #[doc = crate::macros::hal_type_vulkan!("Queue")]
320    #[doc = crate::macros::hal_type_metal!("Queue")]
321    #[doc = crate::macros::hal_type_dx12!("Queue")]
322    #[doc = crate::macros::hal_type_gles!("Queue")]
323    ///
324    /// # Errors
325    ///
326    /// This method will return None if:
327    /// - The queue is not from the backend specified by `A`.
328    /// - The queue is from the `webgpu` or `custom` backend.
329    ///
330    /// # Safety
331    ///
332    /// - The returned resource must not be destroyed unless the guard
333    ///   is the last reference to it and it is not in use by the GPU.
334    ///   The guard and handle may be dropped at any time however.
335    /// - All the safety requirements of wgpu-hal must be upheld.
336    ///
337    /// [`A::Queue`]: hal::Api::Queue
338    #[cfg(wgpu_core)]
339    pub unsafe fn as_hal<A: hal::Api>(
340        &self,
341    ) -> Option<impl Deref<Target = A::Queue> + WasmNotSendSync> {
342        let queue = self.inner.as_core_opt()?;
343        unsafe { queue.context.queue_as_hal::<A>(queue) }
344    }
345
346    /// Compact a BLAS, it must have had [`Blas::prepare_compaction_async`] called on it and had the
347    /// callback provided called.
348    ///
349    /// The returned BLAS is more restricted than a normal BLAS because it may not be rebuilt or
350    /// compacted.
351    pub fn compact_blas(&self, blas: &Blas) -> Blas {
352        let (handle, dispatch) = self.inner.compact_blas(&blas.inner);
353        Blas {
354            handle,
355            inner: dispatch,
356        }
357    }
358}