wgpu_hal/vulkan/
mod.rs

1/*!
2# Vulkan API internals.
3
4## Stack memory
5
6Ash expects slices, which we don't generally have available.
7We cope with this requirement by the combination of the following ways:
8  - temporarily allocating `Vec` on heap, where overhead is permitted
9  - growing temporary local storage
10
11## Framebuffers and Render passes
12
13Render passes are cached on the device and kept forever.
14
15Framebuffers are also cached on the device, but they are removed when
16any of the image views (they have) gets removed.
17If Vulkan supports image-less framebuffers,
18then the actual views are excluded from the framebuffer key.
19
20## Fences
21
22If timeline semaphores are available, they are used 1:1 with wgpu-hal fences.
23Otherwise, we manage a pool of `VkFence` objects behind each `hal::Fence`.
24
25!*/
26
27mod adapter;
28mod command;
29mod conv;
30mod device;
31mod drm;
32mod instance;
33mod sampler;
34mod semaphore_list;
35
36pub use adapter::PhysicalDeviceFeatures;
37
38use alloc::{boxed::Box, ffi::CString, sync::Arc, vec::Vec};
39use core::{borrow::Borrow, ffi::CStr, fmt, marker::PhantomData, mem, num::NonZeroU32};
40
41use arrayvec::ArrayVec;
42use ash::{ext, khr, vk};
43use bytemuck::{Pod, Zeroable};
44use hashbrown::HashSet;
45use parking_lot::{Mutex, RwLock};
46
47use naga::FastHashMap;
48use wgt::InternalCounter;
49
50use semaphore_list::SemaphoreList;
51
52const MILLIS_TO_NANOS: u64 = 1_000_000;
53const MAX_TOTAL_ATTACHMENTS: usize = crate::MAX_COLOR_ATTACHMENTS * 2 + 1;
54
55#[derive(Clone, Debug)]
56pub struct Api;
57
58impl crate::Api for Api {
59    const VARIANT: wgt::Backend = wgt::Backend::Vulkan;
60
61    type Instance = Instance;
62    type Surface = Surface;
63    type Adapter = Adapter;
64    type Device = Device;
65
66    type Queue = Queue;
67    type CommandEncoder = CommandEncoder;
68    type CommandBuffer = CommandBuffer;
69
70    type Buffer = Buffer;
71    type Texture = Texture;
72    type SurfaceTexture = SurfaceTexture;
73    type TextureView = TextureView;
74    type Sampler = Sampler;
75    type QuerySet = QuerySet;
76    type Fence = Fence;
77    type AccelerationStructure = AccelerationStructure;
78    type PipelineCache = PipelineCache;
79
80    type BindGroupLayout = BindGroupLayout;
81    type BindGroup = BindGroup;
82    type PipelineLayout = PipelineLayout;
83    type ShaderModule = ShaderModule;
84    type RenderPipeline = RenderPipeline;
85    type ComputePipeline = ComputePipeline;
86}
87
88crate::impl_dyn_resource!(
89    Adapter,
90    AccelerationStructure,
91    BindGroup,
92    BindGroupLayout,
93    Buffer,
94    CommandBuffer,
95    CommandEncoder,
96    ComputePipeline,
97    Device,
98    Fence,
99    Instance,
100    PipelineCache,
101    PipelineLayout,
102    QuerySet,
103    Queue,
104    RenderPipeline,
105    Sampler,
106    ShaderModule,
107    Surface,
108    SurfaceTexture,
109    Texture,
110    TextureView
111);
112
113struct DebugUtils {
114    extension: ext::debug_utils::Instance,
115    messenger: vk::DebugUtilsMessengerEXT,
116
117    /// Owning pointer to the debug messenger callback user data.
118    ///
119    /// `InstanceShared::drop` destroys the debug messenger before
120    /// dropping this, so the callback should never receive a dangling
121    /// user data pointer.
122    #[allow(dead_code)]
123    callback_data: Box<DebugUtilsMessengerUserData>,
124}
125
126pub struct DebugUtilsCreateInfo {
127    severity: vk::DebugUtilsMessageSeverityFlagsEXT,
128    message_type: vk::DebugUtilsMessageTypeFlagsEXT,
129    callback_data: Box<DebugUtilsMessengerUserData>,
130}
131
132#[derive(Debug)]
133/// The properties related to the validation layer needed for the
134/// DebugUtilsMessenger for their workarounds
135struct ValidationLayerProperties {
136    /// Validation layer description, from `vk::LayerProperties`.
137    layer_description: CString,
138
139    /// Validation layer specification version, from `vk::LayerProperties`.
140    layer_spec_version: u32,
141}
142
143/// User data needed by `instance::debug_utils_messenger_callback`.
144///
145/// When we create the [`vk::DebugUtilsMessengerEXT`], the `pUserData`
146/// pointer refers to one of these values.
147#[derive(Debug)]
148pub struct DebugUtilsMessengerUserData {
149    /// The properties related to the validation layer, if present
150    validation_layer_properties: Option<ValidationLayerProperties>,
151
152    /// If the OBS layer is present. OBS never increments the version of their layer,
153    /// so there's no reason to have the version.
154    has_obs_layer: bool,
155}
156
157pub struct InstanceShared {
158    raw: ash::Instance,
159    extensions: Vec<&'static CStr>,
160    drop_guard: Option<crate::DropGuard>,
161    flags: wgt::InstanceFlags,
162    memory_budget_thresholds: wgt::MemoryBudgetThresholds,
163    debug_utils: Option<DebugUtils>,
164    get_physical_device_properties: Option<khr::get_physical_device_properties2::Instance>,
165    entry: ash::Entry,
166    has_nv_optimus: bool,
167    android_sdk_version: u32,
168    /// The instance API version.
169    ///
170    /// Which is the version of Vulkan supported for instance-level functionality.
171    ///
172    /// It is associated with a `VkInstance` and its children,
173    /// except for a `VkPhysicalDevice` and its children.
174    instance_api_version: u32,
175}
176
177pub struct Instance {
178    shared: Arc<InstanceShared>,
179}
180
181/// Semaphore used to acquire a swapchain image.
182#[derive(Debug)]
183struct SwapchainAcquireSemaphore {
184    /// A semaphore that is signaled when this image is safe for us to modify.
185    ///
186    /// When [`vkAcquireNextImageKHR`] returns the index of the next swapchain
187    /// image that we should use, that image may actually still be in use by the
188    /// presentation engine, and is not yet safe to modify. However, that
189    /// function does accept a semaphore that it will signal when the image is
190    /// indeed safe to begin messing with.
191    ///
192    /// This semaphore is:
193    ///
194    /// - waited for by the first queue submission to operate on this image
195    ///   since it was acquired, and
196    ///
197    /// - signaled by [`vkAcquireNextImageKHR`] when the acquired image is ready
198    ///   for us to use.
199    ///
200    /// [`vkAcquireNextImageKHR`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkAcquireNextImageKHR
201    acquire: vk::Semaphore,
202
203    /// True if the next command submission operating on this image should wait
204    /// for [`acquire`].
205    ///
206    /// We must wait for `acquire` before drawing to this swapchain image, but
207    /// because `wgpu-hal` queue submissions are always strongly ordered, only
208    /// the first submission that works with a swapchain image actually needs to
209    /// wait. We set this flag when this image is acquired, and clear it the
210    /// first time it's passed to [`Queue::submit`] as a surface texture.
211    ///
212    /// Additionally, semaphores can only be waited on once, so we need to ensure
213    /// that we only actually pass this semaphore to the first submission that
214    /// uses that image.
215    ///
216    /// [`acquire`]: SwapchainAcquireSemaphore::acquire
217    /// [`Queue::submit`]: crate::Queue::submit
218    should_wait_for_acquire: bool,
219
220    /// The fence value of the last command submission that wrote to this image.
221    ///
222    /// The next time we try to acquire this image, we'll block until
223    /// this submission finishes, proving that [`acquire`] is ready to
224    /// pass to `vkAcquireNextImageKHR` again.
225    ///
226    /// [`acquire`]: SwapchainAcquireSemaphore::acquire
227    previously_used_submission_index: crate::FenceValue,
228}
229
230impl SwapchainAcquireSemaphore {
231    fn new(device: &DeviceShared, index: usize) -> Result<Self, crate::DeviceError> {
232        Ok(Self {
233            acquire: device
234                .new_binary_semaphore(&format!("SwapchainImageSemaphore: Index {index} acquire"))?,
235            should_wait_for_acquire: true,
236            previously_used_submission_index: 0,
237        })
238    }
239
240    /// Sets the fence value which the next acquire will wait for. This prevents
241    /// the semaphore from being used while the previous submission is still in flight.
242    fn set_used_fence_value(&mut self, value: crate::FenceValue) {
243        self.previously_used_submission_index = value;
244    }
245
246    /// Return the semaphore that commands drawing to this image should wait for, if any.
247    ///
248    /// This only returns `Some` once per acquisition; see
249    /// [`SwapchainAcquireSemaphore::should_wait_for_acquire`] for details.
250    fn get_acquire_wait_semaphore(&mut self) -> Option<vk::Semaphore> {
251        if self.should_wait_for_acquire {
252            self.should_wait_for_acquire = false;
253            Some(self.acquire)
254        } else {
255            None
256        }
257    }
258
259    /// Indicates the cpu-side usage of this semaphore has finished for the frame,
260    /// so reset internal state to be ready for the next frame.
261    fn end_semaphore_usage(&mut self) {
262        // Reset the acquire semaphore, so that the next time we acquire this
263        // image, we can wait for it again.
264        self.should_wait_for_acquire = true;
265    }
266
267    unsafe fn destroy(&self, device: &ash::Device) {
268        unsafe {
269            device.destroy_semaphore(self.acquire, None);
270        }
271    }
272}
273
274#[derive(Debug)]
275struct SwapchainPresentSemaphores {
276    /// A pool of semaphores for ordering presentation after drawing.
277    ///
278    /// The first [`present_index`] semaphores in this vector are:
279    ///
280    /// - all waited on by the call to [`vkQueuePresentKHR`] that presents this
281    ///   image, and
282    ///
283    /// - each signaled by some [`vkQueueSubmit`] queue submission that draws to
284    ///   this image, when the submission finishes execution.
285    ///
286    /// This vector accumulates one semaphore per submission that writes to this
287    /// image. This is awkward, but hard to avoid: [`vkQueuePresentKHR`]
288    /// requires a semaphore to order it with respect to drawing commands, and
289    /// we can't attach new completion semaphores to a command submission after
290    /// it's been submitted. This means that, at submission time, we must create
291    /// the semaphore we might need if the caller's next action is to enqueue a
292    /// presentation of this image.
293    ///
294    /// An alternative strategy would be for presentation to enqueue an empty
295    /// submit, ordered relative to other submits in the usual way, and
296    /// signaling a single presentation semaphore. But we suspect that submits
297    /// are usually expensive enough, and semaphores usually cheap enough, that
298    /// performance-sensitive users will avoid making many submits, so that the
299    /// cost of accumulated semaphores will usually be less than the cost of an
300    /// additional submit.
301    ///
302    /// Only the first [`present_index`] semaphores in the vector are actually
303    /// going to be signalled by submitted commands, and need to be waited for
304    /// by the next present call. Any semaphores beyond that index were created
305    /// for prior presents and are simply being retained for recycling.
306    ///
307    /// [`present_index`]: SwapchainPresentSemaphores::present_index
308    /// [`vkQueuePresentKHR`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueuePresentKHR
309    /// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
310    present: Vec<vk::Semaphore>,
311
312    /// The number of semaphores in [`present`] to be signalled for this submission.
313    ///
314    /// [`present`]: SwapchainPresentSemaphores::present
315    present_index: usize,
316
317    /// Which image this semaphore set is used for.
318    frame_index: usize,
319}
320
321impl SwapchainPresentSemaphores {
322    pub fn new(frame_index: usize) -> Self {
323        Self {
324            present: Vec::new(),
325            present_index: 0,
326            frame_index,
327        }
328    }
329
330    /// Return the semaphore that the next submission that writes to this image should
331    /// signal when it's done.
332    ///
333    /// See [`SwapchainPresentSemaphores::present`] for details.
334    fn get_submit_signal_semaphore(
335        &mut self,
336        device: &DeviceShared,
337    ) -> Result<vk::Semaphore, crate::DeviceError> {
338        // Try to recycle a semaphore we created for a previous presentation.
339        let sem = match self.present.get(self.present_index) {
340            Some(sem) => *sem,
341            None => {
342                let sem = device.new_binary_semaphore(&format!(
343                    "SwapchainImageSemaphore: Image {} present semaphore {}",
344                    self.frame_index, self.present_index
345                ))?;
346                self.present.push(sem);
347                sem
348            }
349        };
350
351        self.present_index += 1;
352
353        Ok(sem)
354    }
355
356    /// Indicates the cpu-side usage of this semaphore has finished for the frame,
357    /// so reset internal state to be ready for the next frame.
358    fn end_semaphore_usage(&mut self) {
359        // Reset the index to 0, so that the next time we get a semaphore, we
360        // start from the beginning of the list.
361        self.present_index = 0;
362    }
363
364    /// Return the semaphores that a presentation of this image should wait on.
365    ///
366    /// Return a slice of semaphores that the call to [`vkQueueSubmit`] that
367    /// ends this image's acquisition should wait for. See
368    /// [`SwapchainPresentSemaphores::present`] for details.
369    ///
370    /// Reset `self` to be ready for the next acquisition cycle.
371    ///
372    /// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
373    fn get_present_wait_semaphores(&mut self) -> Vec<vk::Semaphore> {
374        self.present[0..self.present_index].to_vec()
375    }
376
377    unsafe fn destroy(&self, device: &ash::Device) {
378        unsafe {
379            for sem in &self.present {
380                device.destroy_semaphore(*sem, None);
381            }
382        }
383    }
384}
385
386struct Swapchain {
387    raw: vk::SwapchainKHR,
388    functor: khr::swapchain::Device,
389    device: Arc<DeviceShared>,
390    images: Vec<vk::Image>,
391    config: crate::SurfaceConfiguration,
392
393    /// Semaphores used between image acquisition and the first submission
394    /// that uses that image. This is indexed using [`next_acquire_index`].
395    ///
396    /// Because we need to provide this to [`vkAcquireNextImageKHR`], we haven't
397    /// received the swapchain image index for the frame yet, so we cannot use
398    /// that to index it.
399    ///
400    /// Before we pass this to [`vkAcquireNextImageKHR`], we ensure that we wait on
401    /// the submission indicated by [`previously_used_submission_index`]. This enusres
402    /// the semaphore is no longer in use before we use it.
403    ///
404    /// [`next_acquire_index`]: Swapchain::next_acquire_index
405    /// [`vkAcquireNextImageKHR`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkAcquireNextImageKHR
406    /// [`previously_used_submission_index`]: SwapchainAcquireSemaphore::previously_used_submission_index
407    acquire_semaphores: Vec<Arc<Mutex<SwapchainAcquireSemaphore>>>,
408    /// The index of the next acquire semaphore to use.
409    ///
410    /// This is incremented each time we acquire a new image, and wraps around
411    /// to 0 when it reaches the end of [`acquire_semaphores`].
412    ///
413    /// [`acquire_semaphores`]: Swapchain::acquire_semaphores
414    next_acquire_index: usize,
415
416    /// Semaphore sets used between all submissions that write to an image and
417    /// the presentation of that image.
418    ///
419    /// This is indexed by the swapchain image index returned by
420    /// [`vkAcquireNextImageKHR`].
421    ///
422    /// We know it is safe to use these semaphores because use them
423    /// _after_ the acquire semaphore. Because the acquire semaphore
424    /// has been signaled, the previous presentation using that image
425    /// is known-finished, so this semaphore is no longer in use.
426    ///
427    /// [`vkAcquireNextImageKHR`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkAcquireNextImageKHR
428    present_semaphores: Vec<Arc<Mutex<SwapchainPresentSemaphores>>>,
429
430    /// The present timing information which will be set in the next call to [`present()`](crate::Queue::present()).
431    ///
432    /// # Safety
433    ///
434    /// This must only be set if [`wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING`] is enabled, and
435    /// so the VK_GOOGLE_display_timing extension is present.
436    next_present_time: Option<vk::PresentTimeGOOGLE>,
437}
438
439impl Swapchain {
440    /// Mark the current frame finished, advancing to the next acquire semaphore.
441    fn advance_acquire_semaphore(&mut self) {
442        let semaphore_count = self.acquire_semaphores.len();
443        self.next_acquire_index = (self.next_acquire_index + 1) % semaphore_count;
444    }
445
446    /// Get the next acquire semaphore that should be used with this swapchain.
447    fn get_acquire_semaphore(&self) -> Arc<Mutex<SwapchainAcquireSemaphore>> {
448        self.acquire_semaphores[self.next_acquire_index].clone()
449    }
450
451    /// Get the set of present semaphores that should be used with the given image index.
452    fn get_present_semaphores(&self, index: u32) -> Arc<Mutex<SwapchainPresentSemaphores>> {
453        self.present_semaphores[index as usize].clone()
454    }
455}
456
457pub struct Surface {
458    raw: vk::SurfaceKHR,
459    functor: khr::surface::Instance,
460    instance: Arc<InstanceShared>,
461    swapchain: RwLock<Option<Swapchain>>,
462}
463
464impl Surface {
465    /// Get the raw Vulkan swapchain associated with this surface.
466    ///
467    /// Returns [`None`] if the surface is not configured.
468    pub fn raw_swapchain(&self) -> Option<vk::SwapchainKHR> {
469        let read = self.swapchain.read();
470        read.as_ref().map(|it| it.raw)
471    }
472
473    /// Set the present timing information which will be used for the next [presentation](crate::Queue::present()) of this surface,
474    /// using [VK_GOOGLE_display_timing].
475    ///
476    /// This can be used to give an id to presentations, for future use of [`vk::PastPresentationTimingGOOGLE`].
477    /// Note that `wgpu-hal` does *not* provide a way to use that API - you should manually access this through [`ash`].
478    ///
479    /// This can also be used to add a "not before" timestamp to the presentation.
480    ///
481    /// The exact semantics of the fields are also documented in the [specification](https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPresentTimeGOOGLE.html) for the extension.
482    ///
483    /// # Panics
484    ///
485    /// - If the surface hasn't been configured.
486    /// - If the device doesn't [support present timing](wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING).
487    ///
488    /// [VK_GOOGLE_display_timing]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_GOOGLE_display_timing.html
489    #[track_caller]
490    pub fn set_next_present_time(&self, present_timing: vk::PresentTimeGOOGLE) {
491        let mut swapchain = self.swapchain.write();
492        let swapchain = swapchain
493            .as_mut()
494            .expect("Surface should have been configured");
495        let features = wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING;
496        if swapchain.device.features.contains(features) {
497            swapchain.next_present_time = Some(present_timing);
498        } else {
499            // Ideally we'd use something like `device.required_features` here, but that's in `wgpu-core`, which we are a dependency of
500            panic!(
501                concat!(
502                    "Tried to set display timing properties ",
503                    "without the corresponding feature ({:?}) enabled."
504                ),
505                features
506            );
507        }
508    }
509}
510
511#[derive(Debug)]
512pub struct SurfaceTexture {
513    index: u32,
514    texture: Texture,
515    acquire_semaphores: Arc<Mutex<SwapchainAcquireSemaphore>>,
516    present_semaphores: Arc<Mutex<SwapchainPresentSemaphores>>,
517}
518
519impl crate::DynSurfaceTexture for SurfaceTexture {}
520
521impl Borrow<Texture> for SurfaceTexture {
522    fn borrow(&self) -> &Texture {
523        &self.texture
524    }
525}
526
527impl Borrow<dyn crate::DynTexture> for SurfaceTexture {
528    fn borrow(&self) -> &dyn crate::DynTexture {
529        &self.texture
530    }
531}
532
533pub struct Adapter {
534    raw: vk::PhysicalDevice,
535    instance: Arc<InstanceShared>,
536    //queue_families: Vec<vk::QueueFamilyProperties>,
537    known_memory_flags: vk::MemoryPropertyFlags,
538    phd_capabilities: adapter::PhysicalDeviceProperties,
539    phd_features: PhysicalDeviceFeatures,
540    downlevel_flags: wgt::DownlevelFlags,
541    private_caps: PrivateCapabilities,
542    workarounds: Workarounds,
543}
544
545// TODO there's no reason why this can't be unified--the function pointers should all be the same--it's not clear how to do this with `ash`.
546enum ExtensionFn<T> {
547    /// The loaded function pointer struct for an extension.
548    Extension(T),
549    /// The extension was promoted to a core version of Vulkan and the functions on `ash`'s `DeviceV1_x` traits should be used.
550    Promoted,
551}
552
553struct DeviceExtensionFunctions {
554    debug_utils: Option<ext::debug_utils::Device>,
555    draw_indirect_count: Option<khr::draw_indirect_count::Device>,
556    timeline_semaphore: Option<ExtensionFn<khr::timeline_semaphore::Device>>,
557    ray_tracing: Option<RayTracingDeviceExtensionFunctions>,
558    mesh_shading: Option<ext::mesh_shader::Device>,
559}
560
561struct RayTracingDeviceExtensionFunctions {
562    acceleration_structure: khr::acceleration_structure::Device,
563    buffer_device_address: khr::buffer_device_address::Device,
564}
565
566/// Set of internal capabilities, which don't show up in the exposed
567/// device geometry, but affect the code paths taken internally.
568#[derive(Clone, Debug)]
569struct PrivateCapabilities {
570    image_view_usage: bool,
571    timeline_semaphores: bool,
572    texture_d24: bool,
573    texture_d24_s8: bool,
574    texture_s8: bool,
575    /// Ability to present contents to any screen. Only needed to work around broken platform configurations.
576    can_present: bool,
577    non_coherent_map_mask: wgt::BufferAddress,
578    multi_draw_indirect: bool,
579
580    /// True if this adapter advertises the [`robustBufferAccess`][vrba] feature.
581    ///
582    /// Note that Vulkan's `robustBufferAccess` is not sufficient to implement
583    /// `wgpu_hal`'s guarantee that shaders will not access buffer contents via
584    /// a given bindgroup binding outside that binding's [accessible
585    /// region][ar]. Enabling `robustBufferAccess` does ensure that
586    /// out-of-bounds reads and writes are not undefined behavior (that's good),
587    /// but still permits out-of-bounds reads to return data from anywhere
588    /// within the buffer, not just the accessible region.
589    ///
590    /// [ar]: ../struct.BufferBinding.html#accessible-region
591    /// [vrba]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#features-robustBufferAccess
592    robust_buffer_access: bool,
593
594    robust_image_access: bool,
595
596    /// True if this adapter supports the [`VK_EXT_robustness2`] extension's
597    /// [`robustBufferAccess2`] feature.
598    ///
599    /// This is sufficient to implement `wgpu_hal`'s [required bounds-checking][ar] of
600    /// shader accesses to buffer contents. If this feature is not available,
601    /// this backend must have Naga inject bounds checks in the generated
602    /// SPIR-V.
603    ///
604    /// [`VK_EXT_robustness2`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_EXT_robustness2.html
605    /// [`robustBufferAccess2`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPhysicalDeviceRobustness2FeaturesEXT.html#features-robustBufferAccess2
606    /// [ar]: ../struct.BufferBinding.html#accessible-region
607    robust_buffer_access2: bool,
608
609    robust_image_access2: bool,
610    zero_initialize_workgroup_memory: bool,
611    image_format_list: bool,
612    maximum_samplers: u32,
613
614    /// True if this adapter supports the [`VK_KHR_shader_integer_dot_product`] extension
615    /// (promoted to Vulkan 1.3).
616    ///
617    /// This is used to generate optimized code for WGSL's `dot4{I, U}8Packed`.
618    ///
619    /// [`VK_KHR_shader_integer_dot_product`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_KHR_shader_integer_dot_product.html
620    shader_integer_dot_product: bool,
621
622    /// True if this adapter supports 8-bit integers provided by the
623    /// [`VK_KHR_shader_float16_int8`] extension (promoted to Vulkan 1.2).
624    ///
625    /// Allows shaders to declare the "Int8" capability. Note, however, that this
626    /// feature alone allows the use of 8-bit integers "only in the `Private`,
627    /// `Workgroup` (for non-Block variables), and `Function` storage classes"
628    /// ([see spec]). To use 8-bit integers in the interface storage classes (e.g.,
629    /// `StorageBuffer`), you also need to enable the corresponding feature in
630    /// `VkPhysicalDevice8BitStorageFeatures` and declare the corresponding SPIR-V
631    /// capability (e.g., `StorageBuffer8BitAccess`).
632    ///
633    /// [`VK_KHR_shader_float16_int8`]: https://registry.khronos.org/vulkan/specs/latest/man/html/VK_KHR_shader_float16_int8.html
634    /// [see spec]: https://registry.khronos.org/vulkan/specs/latest/man/html/VkPhysicalDeviceShaderFloat16Int8Features.html#extension-features-shaderInt8
635    shader_int8: bool,
636}
637
638bitflags::bitflags!(
639    /// Workaround flags.
640    #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
641    pub struct Workarounds: u32 {
642        /// Only generate SPIR-V for one entry point at a time.
643        const SEPARATE_ENTRY_POINTS = 0x1;
644        /// Qualcomm OOMs when there are zero color attachments but a non-null pointer
645        /// to a subpass resolve attachment array. This nulls out that pointer in that case.
646        const EMPTY_RESOLVE_ATTACHMENT_LISTS = 0x2;
647        /// If the following code returns false, then nvidia will end up filling the wrong range.
648        ///
649        /// ```skip
650        /// fn nvidia_succeeds() -> bool {
651        ///   # let (copy_length, start_offset) = (0, 0);
652        ///     if copy_length >= 4096 {
653        ///         if start_offset % 16 != 0 {
654        ///             if copy_length == 4096 {
655        ///                 return true;
656        ///             }
657        ///             if copy_length % 16 == 0 {
658        ///                 return false;
659        ///             }
660        ///         }
661        ///     }
662        ///     true
663        /// }
664        /// ```
665        ///
666        /// As such, we need to make sure all calls to vkCmdFillBuffer are aligned to 16 bytes
667        /// if they cover a range of 4096 bytes or more.
668        const FORCE_FILL_BUFFER_WITH_SIZE_GREATER_4096_ALIGNED_OFFSET_16 = 0x4;
669    }
670);
671
672#[derive(Clone, Debug, Eq, Hash, PartialEq)]
673struct AttachmentKey {
674    format: vk::Format,
675    layout: vk::ImageLayout,
676    ops: crate::AttachmentOps,
677}
678
679impl AttachmentKey {
680    /// Returns an attachment key for a compatible attachment.
681    fn compatible(format: vk::Format, layout: vk::ImageLayout) -> Self {
682        Self {
683            format,
684            layout,
685            ops: crate::AttachmentOps::all(),
686        }
687    }
688}
689
690#[derive(Clone, Eq, Hash, PartialEq)]
691struct ColorAttachmentKey {
692    base: AttachmentKey,
693    resolve: Option<AttachmentKey>,
694}
695
696#[derive(Clone, Eq, Hash, PartialEq)]
697struct DepthStencilAttachmentKey {
698    base: AttachmentKey,
699    stencil_ops: crate::AttachmentOps,
700}
701
702#[derive(Clone, Eq, Default, Hash, PartialEq)]
703struct RenderPassKey {
704    colors: ArrayVec<Option<ColorAttachmentKey>, { crate::MAX_COLOR_ATTACHMENTS }>,
705    depth_stencil: Option<DepthStencilAttachmentKey>,
706    sample_count: u32,
707    multiview: Option<NonZeroU32>,
708}
709
710struct DeviceShared {
711    raw: ash::Device,
712    family_index: u32,
713    queue_index: u32,
714    raw_queue: vk::Queue,
715    drop_guard: Option<crate::DropGuard>,
716    instance: Arc<InstanceShared>,
717    physical_device: vk::PhysicalDevice,
718    enabled_extensions: Vec<&'static CStr>,
719    extension_fns: DeviceExtensionFunctions,
720    vendor_id: u32,
721    pipeline_cache_validation_key: [u8; 16],
722    timestamp_period: f32,
723    private_caps: PrivateCapabilities,
724    workarounds: Workarounds,
725    features: wgt::Features,
726    render_passes: Mutex<FastHashMap<RenderPassKey, vk::RenderPass>>,
727    sampler_cache: Mutex<sampler::SamplerCache>,
728    memory_allocations_counter: InternalCounter,
729
730    /// Because we have cached framebuffers which are not deleted from until
731    /// the device is destroyed, if the implementation of vulkan re-uses handles
732    /// we need some way to differentiate between the old handle and the new handle.
733    /// This factory allows us to have a dedicated identity value for each texture.
734    texture_identity_factory: ResourceIdentityFactory<vk::Image>,
735    /// As above, for texture views.
736    texture_view_identity_factory: ResourceIdentityFactory<vk::ImageView>,
737}
738
739impl Drop for DeviceShared {
740    fn drop(&mut self) {
741        for &raw in self.render_passes.lock().values() {
742            unsafe { self.raw.destroy_render_pass(raw, None) };
743        }
744        if self.drop_guard.is_none() {
745            unsafe { self.raw.destroy_device(None) };
746        }
747    }
748}
749
750pub struct Device {
751    shared: Arc<DeviceShared>,
752    mem_allocator: Mutex<gpu_alloc::GpuAllocator<vk::DeviceMemory>>,
753    desc_allocator:
754        Mutex<gpu_descriptor::DescriptorAllocator<vk::DescriptorPool, vk::DescriptorSet>>,
755    valid_ash_memory_types: u32,
756    naga_options: naga::back::spv::Options<'static>,
757    #[cfg(feature = "renderdoc")]
758    render_doc: crate::auxil::renderdoc::RenderDoc,
759    counters: Arc<wgt::HalCounters>,
760}
761
762impl Drop for Device {
763    fn drop(&mut self) {
764        unsafe { self.mem_allocator.lock().cleanup(&*self.shared) };
765        unsafe { self.desc_allocator.lock().cleanup(&*self.shared) };
766    }
767}
768
769/// Semaphores for forcing queue submissions to run in order.
770///
771/// The [`wgpu_hal::Queue`] trait promises that if two calls to [`submit`] are
772/// ordered, then the first submission will finish on the GPU before the second
773/// submission begins. To get this behavior on Vulkan we need to pass semaphores
774/// to [`vkQueueSubmit`] for the commands to wait on before beginning execution,
775/// and to signal when their execution is done.
776///
777/// Normally this can be done with a single semaphore, waited on and then
778/// signalled for each submission. At any given time there's exactly one
779/// submission that would signal the semaphore, and exactly one waiting on it,
780/// as Vulkan requires.
781///
782/// However, as of Oct 2021, bug [#5508] in the Mesa ANV drivers caused them to
783/// hang if we use a single semaphore. The workaround is to alternate between
784/// two semaphores. The bug has been fixed in Mesa, but we should probably keep
785/// the workaround until, say, Oct 2026.
786///
787/// [`wgpu_hal::Queue`]: crate::Queue
788/// [`submit`]: crate::Queue::submit
789/// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
790/// [#5508]: https://gitlab.freedesktop.org/mesa/mesa/-/issues/5508
791#[derive(Clone)]
792struct RelaySemaphores {
793    /// The semaphore the next submission should wait on before beginning
794    /// execution on the GPU. This is `None` for the first submission, which
795    /// should not wait on anything at all.
796    wait: Option<vk::Semaphore>,
797
798    /// The semaphore the next submission should signal when it has finished
799    /// execution on the GPU.
800    signal: vk::Semaphore,
801}
802
803impl RelaySemaphores {
804    fn new(device: &DeviceShared) -> Result<Self, crate::DeviceError> {
805        Ok(Self {
806            wait: None,
807            signal: device.new_binary_semaphore("RelaySemaphores: 1")?,
808        })
809    }
810
811    /// Advances the semaphores, returning the semaphores that should be used for a submission.
812    fn advance(&mut self, device: &DeviceShared) -> Result<Self, crate::DeviceError> {
813        let old = self.clone();
814
815        // Build the state for the next submission.
816        match self.wait {
817            None => {
818                // The `old` values describe the first submission to this queue.
819                // The second submission should wait on `old.signal`, and then
820                // signal a new semaphore which we'll create now.
821                self.wait = Some(old.signal);
822                self.signal = device.new_binary_semaphore("RelaySemaphores: 2")?;
823            }
824            Some(ref mut wait) => {
825                // What this submission signals, the next should wait.
826                mem::swap(wait, &mut self.signal);
827            }
828        };
829
830        Ok(old)
831    }
832
833    /// Destroys the semaphores.
834    unsafe fn destroy(&self, device: &ash::Device) {
835        unsafe {
836            if let Some(wait) = self.wait {
837                device.destroy_semaphore(wait, None);
838            }
839            device.destroy_semaphore(self.signal, None);
840        }
841    }
842}
843
844pub struct Queue {
845    raw: vk::Queue,
846    swapchain_fn: khr::swapchain::Device,
847    device: Arc<DeviceShared>,
848    family_index: u32,
849    relay_semaphores: Mutex<RelaySemaphores>,
850    signal_semaphores: Mutex<SemaphoreList>,
851}
852
853impl Queue {
854    pub fn as_raw(&self) -> vk::Queue {
855        self.raw
856    }
857}
858
859impl Drop for Queue {
860    fn drop(&mut self) {
861        unsafe { self.relay_semaphores.lock().destroy(&self.device.raw) };
862    }
863}
864#[derive(Debug)]
865enum BufferMemoryBacking {
866    Managed(gpu_alloc::MemoryBlock<vk::DeviceMemory>),
867    VulkanMemory {
868        memory: vk::DeviceMemory,
869        offset: u64,
870        size: u64,
871    },
872}
873impl BufferMemoryBacking {
874    fn memory(&self) -> &vk::DeviceMemory {
875        match self {
876            Self::Managed(m) => m.memory(),
877            Self::VulkanMemory { memory, .. } => memory,
878        }
879    }
880    fn offset(&self) -> u64 {
881        match self {
882            Self::Managed(m) => m.offset(),
883            Self::VulkanMemory { offset, .. } => *offset,
884        }
885    }
886    fn size(&self) -> u64 {
887        match self {
888            Self::Managed(m) => m.size(),
889            Self::VulkanMemory { size, .. } => *size,
890        }
891    }
892}
893#[derive(Debug)]
894pub struct Buffer {
895    raw: vk::Buffer,
896    block: Option<Mutex<BufferMemoryBacking>>,
897}
898impl Buffer {
899    /// # Safety
900    ///
901    /// - `vk_buffer`'s memory must be managed by the caller
902    /// - Externally imported buffers can't be mapped by `wgpu`
903    pub unsafe fn from_raw(vk_buffer: vk::Buffer) -> Self {
904        Self {
905            raw: vk_buffer,
906            block: None,
907        }
908    }
909    /// # Safety
910    /// - We will use this buffer and the buffer's backing memory range as if we have exclusive ownership over it, until the wgpu resource is dropped and the wgpu-hal object is cleaned up
911    /// - Externally imported buffers can't be mapped by `wgpu`
912    /// - `offset` and `size` must be valid with the allocation of `memory`
913    pub unsafe fn from_raw_managed(
914        vk_buffer: vk::Buffer,
915        memory: vk::DeviceMemory,
916        offset: u64,
917        size: u64,
918    ) -> Self {
919        Self {
920            raw: vk_buffer,
921            block: Some(Mutex::new(BufferMemoryBacking::VulkanMemory {
922                memory,
923                offset,
924                size,
925            })),
926        }
927    }
928}
929
930impl crate::DynBuffer for Buffer {}
931
932#[derive(Debug)]
933pub struct AccelerationStructure {
934    raw: vk::AccelerationStructureKHR,
935    buffer: vk::Buffer,
936    block: Mutex<gpu_alloc::MemoryBlock<vk::DeviceMemory>>,
937    compacted_size_query: Option<vk::QueryPool>,
938}
939
940impl crate::DynAccelerationStructure for AccelerationStructure {}
941
942#[derive(Debug)]
943pub struct Texture {
944    raw: vk::Image,
945    drop_guard: Option<crate::DropGuard>,
946    external_memory: Option<vk::DeviceMemory>,
947    block: Option<gpu_alloc::MemoryBlock<vk::DeviceMemory>>,
948    format: wgt::TextureFormat,
949    copy_size: crate::CopyExtent,
950    identity: ResourceIdentity<vk::Image>,
951}
952
953impl crate::DynTexture for Texture {}
954
955impl Texture {
956    /// # Safety
957    ///
958    /// - The image handle must not be manually destroyed
959    pub unsafe fn raw_handle(&self) -> vk::Image {
960        self.raw
961    }
962}
963
964#[derive(Debug)]
965pub struct TextureView {
966    raw_texture: vk::Image,
967    raw: vk::ImageView,
968    layers: NonZeroU32,
969    format: wgt::TextureFormat,
970    raw_format: vk::Format,
971    base_mip_level: u32,
972    dimension: wgt::TextureViewDimension,
973    texture_identity: ResourceIdentity<vk::Image>,
974    view_identity: ResourceIdentity<vk::ImageView>,
975}
976
977impl crate::DynTextureView for TextureView {}
978
979impl TextureView {
980    /// # Safety
981    ///
982    /// - The image view handle must not be manually destroyed
983    pub unsafe fn raw_handle(&self) -> vk::ImageView {
984        self.raw
985    }
986
987    /// Returns the raw texture view, along with its identity.
988    fn identified_raw_view(&self) -> IdentifiedTextureView {
989        IdentifiedTextureView {
990            raw: self.raw,
991            identity: self.view_identity,
992        }
993    }
994}
995
996#[derive(Debug)]
997pub struct Sampler {
998    raw: vk::Sampler,
999    create_info: vk::SamplerCreateInfo<'static>,
1000}
1001
1002impl crate::DynSampler for Sampler {}
1003
1004#[derive(Debug)]
1005pub struct BindGroupLayout {
1006    raw: vk::DescriptorSetLayout,
1007    desc_count: gpu_descriptor::DescriptorTotalCount,
1008    types: Box<[(vk::DescriptorType, u32)]>,
1009    /// Map of binding index to size,
1010    binding_arrays: Vec<(u32, NonZeroU32)>,
1011}
1012
1013impl crate::DynBindGroupLayout for BindGroupLayout {}
1014
1015#[derive(Debug)]
1016pub struct PipelineLayout {
1017    raw: vk::PipelineLayout,
1018    binding_arrays: naga::back::spv::BindingMap,
1019}
1020
1021impl crate::DynPipelineLayout for PipelineLayout {}
1022
1023#[derive(Debug)]
1024pub struct BindGroup {
1025    set: gpu_descriptor::DescriptorSet<vk::DescriptorSet>,
1026}
1027
1028impl crate::DynBindGroup for BindGroup {}
1029
1030/// Miscellaneous allocation recycling pool for `CommandAllocator`.
1031#[derive(Default)]
1032struct Temp {
1033    marker: Vec<u8>,
1034    buffer_barriers: Vec<vk::BufferMemoryBarrier<'static>>,
1035    image_barriers: Vec<vk::ImageMemoryBarrier<'static>>,
1036}
1037
1038impl Temp {
1039    fn clear(&mut self) {
1040        self.marker.clear();
1041        self.buffer_barriers.clear();
1042        self.image_barriers.clear();
1043    }
1044
1045    fn make_c_str(&mut self, name: &str) -> &CStr {
1046        self.marker.clear();
1047        self.marker.extend_from_slice(name.as_bytes());
1048        self.marker.push(0);
1049        unsafe { CStr::from_bytes_with_nul_unchecked(&self.marker) }
1050    }
1051}
1052
1053/// Generates unique IDs for each resource of type `T`.
1054///
1055/// Because vk handles are not permanently unique, this
1056/// provides a way to generate unique IDs for each resource.
1057struct ResourceIdentityFactory<T> {
1058    #[cfg(not(target_has_atomic = "64"))]
1059    next_id: Mutex<u64>,
1060    #[cfg(target_has_atomic = "64")]
1061    next_id: core::sync::atomic::AtomicU64,
1062    _phantom: PhantomData<T>,
1063}
1064
1065impl<T> ResourceIdentityFactory<T> {
1066    fn new() -> Self {
1067        Self {
1068            #[cfg(not(target_has_atomic = "64"))]
1069            next_id: Mutex::new(0),
1070            #[cfg(target_has_atomic = "64")]
1071            next_id: core::sync::atomic::AtomicU64::new(0),
1072            _phantom: PhantomData,
1073        }
1074    }
1075
1076    /// Returns a new unique ID for a resource of type `T`.
1077    fn next(&self) -> ResourceIdentity<T> {
1078        #[cfg(not(target_has_atomic = "64"))]
1079        {
1080            let mut next_id = self.next_id.lock();
1081            let id = *next_id;
1082            *next_id += 1;
1083            ResourceIdentity {
1084                id,
1085                _phantom: PhantomData,
1086            }
1087        }
1088
1089        #[cfg(target_has_atomic = "64")]
1090        ResourceIdentity {
1091            id: self
1092                .next_id
1093                .fetch_add(1, core::sync::atomic::Ordering::Relaxed),
1094            _phantom: PhantomData,
1095        }
1096    }
1097}
1098
1099/// A unique identifier for a resource of type `T`.
1100///
1101/// This is used as a hashable key for resources, which
1102/// is permanently unique through the lifetime of the program.
1103#[derive(Debug, Copy, Clone, Eq, Hash, PartialEq)]
1104struct ResourceIdentity<T> {
1105    id: u64,
1106    _phantom: PhantomData<T>,
1107}
1108
1109#[derive(Clone, Eq, Hash, PartialEq)]
1110struct FramebufferKey {
1111    raw_pass: vk::RenderPass,
1112    /// Because this is used as a key in a hash map, we need to include the identity
1113    /// so that this hashes differently, even if the ImageView handles are the same
1114    /// between different views.
1115    attachment_identities: ArrayVec<ResourceIdentity<vk::ImageView>, { MAX_TOTAL_ATTACHMENTS }>,
1116    /// While this is redundant for calculating the hash, we need access to an array
1117    /// of all the raw ImageViews when we are creating the actual framebuffer,
1118    /// so we store this here.
1119    attachment_views: ArrayVec<vk::ImageView, { MAX_TOTAL_ATTACHMENTS }>,
1120    extent: wgt::Extent3d,
1121}
1122
1123impl FramebufferKey {
1124    fn push_view(&mut self, view: IdentifiedTextureView) {
1125        self.attachment_identities.push(view.identity);
1126        self.attachment_views.push(view.raw);
1127    }
1128}
1129
1130/// A texture view paired with its identity.
1131#[derive(Copy, Clone)]
1132struct IdentifiedTextureView {
1133    raw: vk::ImageView,
1134    identity: ResourceIdentity<vk::ImageView>,
1135}
1136
1137#[derive(Clone, Eq, Hash, PartialEq)]
1138struct TempTextureViewKey {
1139    texture: vk::Image,
1140    /// As this is used in a hashmap, we need to
1141    /// include the identity so that this hashes differently,
1142    /// even if the Image handles are the same between different images.
1143    texture_identity: ResourceIdentity<vk::Image>,
1144    format: vk::Format,
1145    mip_level: u32,
1146    depth_slice: u32,
1147}
1148
1149pub struct CommandEncoder {
1150    raw: vk::CommandPool,
1151    device: Arc<DeviceShared>,
1152
1153    /// The current command buffer, if `self` is in the ["recording"]
1154    /// state.
1155    ///
1156    /// ["recording"]: crate::CommandEncoder
1157    ///
1158    /// If non-`null`, the buffer is in the Vulkan "recording" state.
1159    active: vk::CommandBuffer,
1160
1161    /// What kind of pass we are currently within: compute or render.
1162    bind_point: vk::PipelineBindPoint,
1163
1164    /// Allocation recycling pool for this encoder.
1165    temp: Temp,
1166
1167    /// A pool of available command buffers.
1168    ///
1169    /// These are all in the Vulkan "initial" state.
1170    free: Vec<vk::CommandBuffer>,
1171
1172    /// A pool of discarded command buffers.
1173    ///
1174    /// These could be in any Vulkan state except "pending".
1175    discarded: Vec<vk::CommandBuffer>,
1176
1177    /// If this is true, the active renderpass enabled a debug span,
1178    /// and needs to be disabled on renderpass close.
1179    rpass_debug_marker_active: bool,
1180
1181    /// If set, the end of the next render/compute pass will write a timestamp at
1182    /// the given pool & location.
1183    end_of_pass_timer_query: Option<(vk::QueryPool, u32)>,
1184
1185    framebuffers: FastHashMap<FramebufferKey, vk::Framebuffer>,
1186    temp_texture_views: FastHashMap<TempTextureViewKey, IdentifiedTextureView>,
1187
1188    counters: Arc<wgt::HalCounters>,
1189}
1190
1191impl Drop for CommandEncoder {
1192    fn drop(&mut self) {
1193        // SAFETY:
1194        //
1195        // VUID-vkDestroyCommandPool-commandPool-00041: wgpu_hal requires that a
1196        // `CommandBuffer` must live until its execution is complete, and that a
1197        // `CommandBuffer` must not outlive the `CommandEncoder` that built it.
1198        // Thus, we know that none of our `CommandBuffers` are in the "pending"
1199        // state.
1200        //
1201        // The other VUIDs are pretty obvious.
1202        unsafe {
1203            // `vkDestroyCommandPool` also frees any command buffers allocated
1204            // from that pool, so there's no need to explicitly call
1205            // `vkFreeCommandBuffers` on `cmd_encoder`'s `free` and `discarded`
1206            // fields.
1207            self.device.raw.destroy_command_pool(self.raw, None);
1208        }
1209
1210        for (_, fb) in self.framebuffers.drain() {
1211            unsafe { self.device.raw.destroy_framebuffer(fb, None) };
1212        }
1213
1214        for (_, view) in self.temp_texture_views.drain() {
1215            unsafe { self.device.raw.destroy_image_view(view.raw, None) };
1216        }
1217
1218        self.counters.command_encoders.sub(1);
1219    }
1220}
1221
1222impl CommandEncoder {
1223    /// # Safety
1224    ///
1225    /// - The command buffer handle must not be manually destroyed
1226    pub unsafe fn raw_handle(&self) -> vk::CommandBuffer {
1227        self.active
1228    }
1229}
1230
1231impl fmt::Debug for CommandEncoder {
1232    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1233        f.debug_struct("CommandEncoder")
1234            .field("raw", &self.raw)
1235            .finish()
1236    }
1237}
1238
1239#[derive(Debug)]
1240pub struct CommandBuffer {
1241    raw: vk::CommandBuffer,
1242}
1243
1244impl crate::DynCommandBuffer for CommandBuffer {}
1245
1246#[derive(Debug)]
1247#[allow(clippy::large_enum_variant)]
1248pub enum ShaderModule {
1249    Raw(vk::ShaderModule),
1250    Intermediate {
1251        naga_shader: crate::NagaShader,
1252        runtime_checks: wgt::ShaderRuntimeChecks,
1253    },
1254}
1255
1256impl crate::DynShaderModule for ShaderModule {}
1257
1258#[derive(Debug)]
1259pub struct RenderPipeline {
1260    raw: vk::Pipeline,
1261}
1262
1263impl crate::DynRenderPipeline for RenderPipeline {}
1264
1265#[derive(Debug)]
1266pub struct ComputePipeline {
1267    raw: vk::Pipeline,
1268}
1269
1270impl crate::DynComputePipeline for ComputePipeline {}
1271
1272#[derive(Debug)]
1273pub struct PipelineCache {
1274    raw: vk::PipelineCache,
1275}
1276
1277impl crate::DynPipelineCache for PipelineCache {}
1278
1279#[derive(Debug)]
1280pub struct QuerySet {
1281    raw: vk::QueryPool,
1282}
1283
1284impl crate::DynQuerySet for QuerySet {}
1285
1286/// The [`Api::Fence`] type for [`vulkan::Api`].
1287///
1288/// This is an `enum` because there are two possible implementations of
1289/// `wgpu-hal` fences on Vulkan: Vulkan fences, which work on any version of
1290/// Vulkan, and Vulkan timeline semaphores, which are easier and cheaper but
1291/// require non-1.0 features.
1292///
1293/// [`Device::create_fence`] returns a [`TimelineSemaphore`] if
1294/// [`VK_KHR_timeline_semaphore`] is available and enabled, and a [`FencePool`]
1295/// otherwise.
1296///
1297/// [`Api::Fence`]: crate::Api::Fence
1298/// [`vulkan::Api`]: Api
1299/// [`Device::create_fence`]: crate::Device::create_fence
1300/// [`TimelineSemaphore`]: Fence::TimelineSemaphore
1301/// [`VK_KHR_timeline_semaphore`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VK_KHR_timeline_semaphore
1302/// [`FencePool`]: Fence::FencePool
1303#[derive(Debug)]
1304pub enum Fence {
1305    /// A Vulkan [timeline semaphore].
1306    ///
1307    /// These are simpler to use than Vulkan fences, since timeline semaphores
1308    /// work exactly the way [`wpgu_hal::Api::Fence`] is specified to work.
1309    ///
1310    /// [timeline semaphore]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-semaphores
1311    /// [`wpgu_hal::Api::Fence`]: crate::Api::Fence
1312    TimelineSemaphore(vk::Semaphore),
1313
1314    /// A collection of Vulkan [fence]s, each associated with a [`FenceValue`].
1315    ///
1316    /// The effective [`FenceValue`] of this variant is the greater of
1317    /// `last_completed` and the maximum value associated with a signalled fence
1318    /// in `active`.
1319    ///
1320    /// Fences are available in all versions of Vulkan, but since they only have
1321    /// two states, "signaled" and "unsignaled", we need to use a separate fence
1322    /// for each queue submission we might want to wait for, and remember which
1323    /// [`FenceValue`] each one represents.
1324    ///
1325    /// [fence]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-fences
1326    /// [`FenceValue`]: crate::FenceValue
1327    FencePool {
1328        last_completed: crate::FenceValue,
1329        /// The pending fence values have to be ascending.
1330        active: Vec<(crate::FenceValue, vk::Fence)>,
1331        free: Vec<vk::Fence>,
1332    },
1333}
1334
1335impl crate::DynFence for Fence {}
1336
1337impl Fence {
1338    /// Return the highest [`FenceValue`] among the signalled fences in `active`.
1339    ///
1340    /// As an optimization, assume that we already know that the fence has
1341    /// reached `last_completed`, and don't bother checking fences whose values
1342    /// are less than that: those fences remain in the `active` array only
1343    /// because we haven't called `maintain` yet to clean them up.
1344    ///
1345    /// [`FenceValue`]: crate::FenceValue
1346    fn check_active(
1347        device: &ash::Device,
1348        mut last_completed: crate::FenceValue,
1349        active: &[(crate::FenceValue, vk::Fence)],
1350    ) -> Result<crate::FenceValue, crate::DeviceError> {
1351        for &(value, raw) in active.iter() {
1352            unsafe {
1353                if value > last_completed
1354                    && device
1355                        .get_fence_status(raw)
1356                        .map_err(map_host_device_oom_and_lost_err)?
1357                {
1358                    last_completed = value;
1359                }
1360            }
1361        }
1362        Ok(last_completed)
1363    }
1364
1365    /// Return the highest signalled [`FenceValue`] for `self`.
1366    ///
1367    /// [`FenceValue`]: crate::FenceValue
1368    fn get_latest(
1369        &self,
1370        device: &ash::Device,
1371        extension: Option<&ExtensionFn<khr::timeline_semaphore::Device>>,
1372    ) -> Result<crate::FenceValue, crate::DeviceError> {
1373        match *self {
1374            Self::TimelineSemaphore(raw) => unsafe {
1375                Ok(match *extension.unwrap() {
1376                    ExtensionFn::Extension(ref ext) => ext
1377                        .get_semaphore_counter_value(raw)
1378                        .map_err(map_host_device_oom_and_lost_err)?,
1379                    ExtensionFn::Promoted => device
1380                        .get_semaphore_counter_value(raw)
1381                        .map_err(map_host_device_oom_and_lost_err)?,
1382                })
1383            },
1384            Self::FencePool {
1385                last_completed,
1386                ref active,
1387                free: _,
1388            } => Self::check_active(device, last_completed, active),
1389        }
1390    }
1391
1392    /// Trim the internal state of this [`Fence`].
1393    ///
1394    /// This function has no externally visible effect, but you should call it
1395    /// periodically to keep this fence's resource consumption under control.
1396    ///
1397    /// For fences using the [`FencePool`] implementation, this function
1398    /// recycles fences that have been signaled. If you don't call this,
1399    /// [`Queue::submit`] will just keep allocating a new Vulkan fence every
1400    /// time it's called.
1401    ///
1402    /// [`FencePool`]: Fence::FencePool
1403    /// [`Queue::submit`]: crate::Queue::submit
1404    fn maintain(&mut self, device: &ash::Device) -> Result<(), crate::DeviceError> {
1405        match *self {
1406            Self::TimelineSemaphore(_) => {}
1407            Self::FencePool {
1408                ref mut last_completed,
1409                ref mut active,
1410                ref mut free,
1411            } => {
1412                let latest = Self::check_active(device, *last_completed, active)?;
1413                let base_free = free.len();
1414                for &(value, raw) in active.iter() {
1415                    if value <= latest {
1416                        free.push(raw);
1417                    }
1418                }
1419                if free.len() != base_free {
1420                    active.retain(|&(value, _)| value > latest);
1421                    unsafe { device.reset_fences(&free[base_free..]) }
1422                        .map_err(map_device_oom_err)?
1423                }
1424                *last_completed = latest;
1425            }
1426        }
1427        Ok(())
1428    }
1429}
1430
1431impl crate::Queue for Queue {
1432    type A = Api;
1433
1434    unsafe fn submit(
1435        &self,
1436        command_buffers: &[&CommandBuffer],
1437        surface_textures: &[&SurfaceTexture],
1438        (signal_fence, signal_value): (&mut Fence, crate::FenceValue),
1439    ) -> Result<(), crate::DeviceError> {
1440        let mut fence_raw = vk::Fence::null();
1441
1442        let mut wait_stage_masks = Vec::new();
1443        let mut wait_semaphores = Vec::new();
1444        let mut signal_semaphores = SemaphoreList::default();
1445
1446        // Double check that the same swapchain image isn't being given to us multiple times,
1447        // as that will deadlock when we try to lock them all.
1448        debug_assert!(
1449            {
1450                let mut check = HashSet::with_capacity(surface_textures.len());
1451                // We compare the Arcs by pointer, as Eq isn't well defined for SurfaceSemaphores.
1452                for st in surface_textures {
1453                    check.insert(Arc::as_ptr(&st.acquire_semaphores) as usize);
1454                    check.insert(Arc::as_ptr(&st.present_semaphores) as usize);
1455                }
1456                check.len() == surface_textures.len() * 2
1457            },
1458            "More than one surface texture is being used from the same swapchain. This will cause a deadlock in release."
1459        );
1460
1461        let locked_swapchain_semaphores = surface_textures
1462            .iter()
1463            .map(|st| {
1464                let acquire = st
1465                    .acquire_semaphores
1466                    .try_lock()
1467                    .expect("Failed to lock surface acquire semaphore");
1468                let present = st
1469                    .present_semaphores
1470                    .try_lock()
1471                    .expect("Failed to lock surface present semaphore");
1472
1473                (acquire, present)
1474            })
1475            .collect::<Vec<_>>();
1476
1477        for (mut acquire_semaphore, mut present_semaphores) in locked_swapchain_semaphores {
1478            acquire_semaphore.set_used_fence_value(signal_value);
1479
1480            // If we're the first submission to operate on this image, wait on
1481            // its acquire semaphore, to make sure the presentation engine is
1482            // done with it.
1483            if let Some(sem) = acquire_semaphore.get_acquire_wait_semaphore() {
1484                wait_stage_masks.push(vk::PipelineStageFlags::TOP_OF_PIPE);
1485                wait_semaphores.push(sem);
1486            }
1487
1488            // Get a semaphore to signal when we're done writing to this surface
1489            // image. Presentation of this image will wait for this.
1490            let signal_semaphore = present_semaphores.get_submit_signal_semaphore(&self.device)?;
1491            signal_semaphores.push_binary(signal_semaphore);
1492        }
1493
1494        let mut guard = self.signal_semaphores.lock();
1495        if !guard.is_empty() {
1496            signal_semaphores.append(&mut guard);
1497        }
1498
1499        // In order for submissions to be strictly ordered, we encode a dependency between each submission
1500        // using a pair of semaphores. This adds a wait if it is needed, and signals the next semaphore.
1501        let semaphore_state = self.relay_semaphores.lock().advance(&self.device)?;
1502
1503        if let Some(sem) = semaphore_state.wait {
1504            wait_stage_masks.push(vk::PipelineStageFlags::TOP_OF_PIPE);
1505            wait_semaphores.push(sem);
1506        }
1507
1508        signal_semaphores.push_binary(semaphore_state.signal);
1509
1510        // We need to signal our wgpu::Fence if we have one, this adds it to the signal list.
1511        signal_fence.maintain(&self.device.raw)?;
1512        match *signal_fence {
1513            Fence::TimelineSemaphore(raw) => {
1514                signal_semaphores.push_timeline(raw, signal_value);
1515            }
1516            Fence::FencePool {
1517                ref mut active,
1518                ref mut free,
1519                ..
1520            } => {
1521                fence_raw = match free.pop() {
1522                    Some(raw) => raw,
1523                    None => unsafe {
1524                        self.device
1525                            .raw
1526                            .create_fence(&vk::FenceCreateInfo::default(), None)
1527                            .map_err(map_host_device_oom_err)?
1528                    },
1529                };
1530                active.push((signal_value, fence_raw));
1531            }
1532        }
1533
1534        let vk_cmd_buffers = command_buffers
1535            .iter()
1536            .map(|cmd| cmd.raw)
1537            .collect::<Vec<_>>();
1538
1539        let mut vk_info = vk::SubmitInfo::default().command_buffers(&vk_cmd_buffers);
1540
1541        vk_info = vk_info
1542            .wait_semaphores(&wait_semaphores)
1543            .wait_dst_stage_mask(&wait_stage_masks);
1544
1545        let mut vk_timeline_info = mem::MaybeUninit::uninit();
1546        vk_info = signal_semaphores.add_to_submit(vk_info, &mut vk_timeline_info);
1547
1548        profiling::scope!("vkQueueSubmit");
1549        unsafe {
1550            self.device
1551                .raw
1552                .queue_submit(self.raw, &[vk_info], fence_raw)
1553                .map_err(map_host_device_oom_and_lost_err)?
1554        };
1555        Ok(())
1556    }
1557
1558    unsafe fn present(
1559        &self,
1560        surface: &Surface,
1561        texture: SurfaceTexture,
1562    ) -> Result<(), crate::SurfaceError> {
1563        let mut swapchain = surface.swapchain.write();
1564        let ssc = swapchain.as_mut().unwrap();
1565        let mut acquire_semaphore = texture.acquire_semaphores.lock();
1566        let mut present_semaphores = texture.present_semaphores.lock();
1567
1568        let wait_semaphores = present_semaphores.get_present_wait_semaphores();
1569
1570        // Reset the acquire and present semaphores internal state
1571        // to be ready for the next frame.
1572        //
1573        // We do this before the actual call to present to ensure that
1574        // even if this method errors and early outs, we have reset
1575        // the state for next frame.
1576        acquire_semaphore.end_semaphore_usage();
1577        present_semaphores.end_semaphore_usage();
1578
1579        drop(acquire_semaphore);
1580
1581        let swapchains = [ssc.raw];
1582        let image_indices = [texture.index];
1583        let vk_info = vk::PresentInfoKHR::default()
1584            .swapchains(&swapchains)
1585            .image_indices(&image_indices)
1586            .wait_semaphores(&wait_semaphores);
1587
1588        let mut display_timing;
1589        let present_times;
1590        let vk_info = if let Some(present_time) = ssc.next_present_time.take() {
1591            debug_assert!(
1592                ssc.device
1593                    .features
1594                    .contains(wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING),
1595                "`next_present_time` should only be set if `VULKAN_GOOGLE_DISPLAY_TIMING` is enabled"
1596            );
1597            present_times = [present_time];
1598            display_timing = vk::PresentTimesInfoGOOGLE::default().times(&present_times);
1599            // SAFETY: We know that VK_GOOGLE_display_timing is present because of the safety contract on `next_present_time`.
1600            vk_info.push_next(&mut display_timing)
1601        } else {
1602            vk_info
1603        };
1604
1605        let suboptimal = {
1606            profiling::scope!("vkQueuePresentKHR");
1607            unsafe { self.swapchain_fn.queue_present(self.raw, &vk_info) }.map_err(|error| {
1608                match error {
1609                    vk::Result::ERROR_OUT_OF_DATE_KHR => crate::SurfaceError::Outdated,
1610                    vk::Result::ERROR_SURFACE_LOST_KHR => crate::SurfaceError::Lost,
1611                    // We don't use VK_EXT_full_screen_exclusive
1612                    // VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT
1613                    _ => map_host_device_oom_and_lost_err(error).into(),
1614                }
1615            })?
1616        };
1617        if suboptimal {
1618            // We treat `VK_SUBOPTIMAL_KHR` as `VK_SUCCESS` on Android.
1619            // On Android 10+, libvulkan's `vkQueuePresentKHR` implementation returns `VK_SUBOPTIMAL_KHR` if not doing pre-rotation
1620            // (i.e `VkSwapchainCreateInfoKHR::preTransform` not being equal to the current device orientation).
1621            // This is always the case when the device orientation is anything other than the identity one, as we unconditionally use `VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR`.
1622            #[cfg(not(target_os = "android"))]
1623            log::warn!("Suboptimal present of frame {}", texture.index);
1624        }
1625        Ok(())
1626    }
1627
1628    unsafe fn get_timestamp_period(&self) -> f32 {
1629        self.device.timestamp_period
1630    }
1631}
1632
1633impl Queue {
1634    pub fn raw_device(&self) -> &ash::Device {
1635        &self.device.raw
1636    }
1637
1638    pub fn add_signal_semaphore(&self, semaphore: vk::Semaphore, semaphore_value: Option<u64>) {
1639        let mut guard = self.signal_semaphores.lock();
1640        if let Some(value) = semaphore_value {
1641            guard.push_timeline(semaphore, value);
1642        } else {
1643            guard.push_binary(semaphore);
1644        }
1645    }
1646}
1647
1648/// Maps
1649///
1650/// - VK_ERROR_OUT_OF_HOST_MEMORY
1651/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1652fn map_host_device_oom_err(err: vk::Result) -> crate::DeviceError {
1653    match err {
1654        vk::Result::ERROR_OUT_OF_HOST_MEMORY | vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => {
1655            get_oom_err(err)
1656        }
1657        e => get_unexpected_err(e),
1658    }
1659}
1660
1661/// Maps
1662///
1663/// - VK_ERROR_OUT_OF_HOST_MEMORY
1664/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1665/// - VK_ERROR_DEVICE_LOST
1666fn map_host_device_oom_and_lost_err(err: vk::Result) -> crate::DeviceError {
1667    match err {
1668        vk::Result::ERROR_DEVICE_LOST => get_lost_err(),
1669        other => map_host_device_oom_err(other),
1670    }
1671}
1672
1673/// Maps
1674///
1675/// - VK_ERROR_OUT_OF_HOST_MEMORY
1676/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1677/// - VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1678fn map_host_device_oom_and_ioca_err(err: vk::Result) -> crate::DeviceError {
1679    // We don't use VK_KHR_buffer_device_address
1680    // VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1681    map_host_device_oom_err(err)
1682}
1683
1684/// Maps
1685///
1686/// - VK_ERROR_OUT_OF_HOST_MEMORY
1687fn map_host_oom_err(err: vk::Result) -> crate::DeviceError {
1688    match err {
1689        vk::Result::ERROR_OUT_OF_HOST_MEMORY => get_oom_err(err),
1690        e => get_unexpected_err(e),
1691    }
1692}
1693
1694/// Maps
1695///
1696/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1697fn map_device_oom_err(err: vk::Result) -> crate::DeviceError {
1698    match err {
1699        vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => get_oom_err(err),
1700        e => get_unexpected_err(e),
1701    }
1702}
1703
1704/// Maps
1705///
1706/// - VK_ERROR_OUT_OF_HOST_MEMORY
1707/// - VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1708fn map_host_oom_and_ioca_err(err: vk::Result) -> crate::DeviceError {
1709    // We don't use VK_KHR_buffer_device_address
1710    // VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1711    map_host_oom_err(err)
1712}
1713
1714/// Maps
1715///
1716/// - VK_ERROR_OUT_OF_HOST_MEMORY
1717/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1718/// - VK_PIPELINE_COMPILE_REQUIRED_EXT
1719/// - VK_ERROR_INVALID_SHADER_NV
1720fn map_pipeline_err(err: vk::Result) -> crate::DeviceError {
1721    // We don't use VK_EXT_pipeline_creation_cache_control
1722    // VK_PIPELINE_COMPILE_REQUIRED_EXT
1723    // We don't use VK_NV_glsl_shader
1724    // VK_ERROR_INVALID_SHADER_NV
1725    map_host_device_oom_err(err)
1726}
1727
1728/// Returns [`crate::DeviceError::Unexpected`] or panics if the `internal_error_panic`
1729/// feature flag is enabled.
1730fn get_unexpected_err(_err: vk::Result) -> crate::DeviceError {
1731    #[cfg(feature = "internal_error_panic")]
1732    panic!("Unexpected Vulkan error: {_err:?}");
1733
1734    #[allow(unreachable_code)]
1735    crate::DeviceError::Unexpected
1736}
1737
1738/// Returns [`crate::DeviceError::OutOfMemory`].
1739fn get_oom_err(_err: vk::Result) -> crate::DeviceError {
1740    crate::DeviceError::OutOfMemory
1741}
1742
1743/// Returns [`crate::DeviceError::Lost`] or panics if the `device_lost_panic`
1744/// feature flag is enabled.
1745fn get_lost_err() -> crate::DeviceError {
1746    #[cfg(feature = "device_lost_panic")]
1747    panic!("Device lost");
1748
1749    #[allow(unreachable_code)]
1750    crate::DeviceError::Lost
1751}
1752
1753#[derive(Clone, Copy, Pod, Zeroable)]
1754#[repr(C)]
1755struct RawTlasInstance {
1756    transform: [f32; 12],
1757    custom_data_and_mask: u32,
1758    shader_binding_table_record_offset_and_flags: u32,
1759    acceleration_structure_reference: u64,
1760}
1761
1762/// Arguments to the [`CreateDeviceCallback`].
1763pub struct CreateDeviceCallbackArgs<'arg, 'pnext, 'this>
1764where
1765    'this: 'pnext,
1766{
1767    /// The extensions to enable for the device. You must not remove anything from this list,
1768    /// but you may add to it.
1769    pub extensions: &'arg mut Vec<&'static CStr>,
1770    /// The physical device features to enable. You may enable features, but must not disable any.
1771    pub device_features: &'arg mut PhysicalDeviceFeatures,
1772    /// The queue create infos for the device. You may add or modify queue create infos as needed.
1773    pub queue_create_infos: &'arg mut Vec<vk::DeviceQueueCreateInfo<'pnext>>,
1774    /// The create info for the device. You may add or modify things in the pnext chain, but
1775    /// do not turn features off. Additionally, do not add things to the list of extensions,
1776    /// or to the feature set, as all changes to that member will be overwritten.
1777    pub create_info: &'arg mut vk::DeviceCreateInfo<'pnext>,
1778    /// We need to have `'this` in the struct, so we can declare that all lifetimes coming from
1779    /// captures in the closure will live longer (and hence satisfy) `'pnext`. However, we
1780    /// don't actually directly use `'this`
1781    _phantom: PhantomData<&'this ()>,
1782}
1783
1784/// Callback to allow changing the vulkan device creation parameters.
1785///
1786/// # Safety:
1787/// - If you want to add extensions, add the to the `Vec<'static CStr>` not the create info,
1788///   as the create info value will be overwritten.
1789/// - Callback must not remove features.
1790/// - Callback must not change anything to what the instance does not support.
1791pub type CreateDeviceCallback<'this> =
1792    dyn for<'arg, 'pnext> FnOnce(CreateDeviceCallbackArgs<'arg, 'pnext, 'this>) + 'this;
1793
1794/// Arguments to the [`CreateInstanceCallback`].
1795pub struct CreateInstanceCallbackArgs<'arg, 'pnext, 'this>
1796where
1797    'this: 'pnext,
1798{
1799    /// The extensions to enable for the instance. You must not remove anything from this list,
1800    /// but you may add to it.
1801    pub extensions: &'arg mut Vec<&'static CStr>,
1802    /// The create info for the instance. You may add or modify things in the pnext chain, but
1803    /// do not turn features off. Additionally, do not add things to the list of extensions,
1804    /// all changes to that member will be overwritten.
1805    pub create_info: &'arg mut vk::InstanceCreateInfo<'pnext>,
1806    /// Vulkan entry point.
1807    pub entry: &'arg ash::Entry,
1808    /// We need to have `'this` in the struct, so we can declare that all lifetimes coming from
1809    /// captures in the closure will live longer (and hence satisfy) `'pnext`. However, we
1810    /// don't actually directly use `'this`
1811    _phantom: PhantomData<&'this ()>,
1812}
1813
1814/// Callback to allow changing the vulkan instance creation parameters.
1815///
1816/// # Safety:
1817/// - If you want to add extensions, add the to the `Vec<'static CStr>` not the create info,
1818///   as the create info value will be overwritten.
1819/// - Callback must not remove features.
1820/// - Callback must not change anything to what the instance does not support.
1821pub type CreateInstanceCallback<'this> =
1822    dyn for<'arg, 'pnext> FnOnce(CreateInstanceCallbackArgs<'arg, 'pnext, 'this>) + 'this;