wgpu_hal/vulkan/
mod.rs

1/*!
2# Vulkan API internals.
3
4## Stack memory
5
6Ash expects slices, which we don't generally have available.
7We cope with this requirement by the combination of the following ways:
8  - temporarily allocating `Vec` on heap, where overhead is permitted
9  - growing temporary local storage
10
11## Framebuffers and Render passes
12
13Render passes are cached on the device and kept forever.
14
15Framebuffers are also cached on the device, but they are removed when
16any of the image views (they have) gets removed.
17If Vulkan supports image-less framebuffers,
18then the actual views are excluded from the framebuffer key.
19
20## Fences
21
22If timeline semaphores are available, they are used 1:1 with wgpu-hal fences.
23Otherwise, we manage a pool of `VkFence` objects behind each `hal::Fence`.
24
25!*/
26
27mod adapter;
28mod command;
29mod conv;
30mod device;
31mod drm;
32mod instance;
33mod sampler;
34mod semaphore_list;
35
36pub use adapter::PhysicalDeviceFeatures;
37
38use alloc::{boxed::Box, ffi::CString, sync::Arc, vec::Vec};
39use core::{borrow::Borrow, ffi::CStr, fmt, marker::PhantomData, mem, num::NonZeroU32};
40
41use arrayvec::ArrayVec;
42use ash::{ext, khr, vk};
43use bytemuck::{Pod, Zeroable};
44use hashbrown::HashSet;
45use parking_lot::{Mutex, RwLock};
46
47use naga::FastHashMap;
48use wgt::InternalCounter;
49
50use semaphore_list::SemaphoreList;
51
52const MILLIS_TO_NANOS: u64 = 1_000_000;
53const MAX_TOTAL_ATTACHMENTS: usize = crate::MAX_COLOR_ATTACHMENTS * 2 + 1;
54
55#[derive(Clone, Debug)]
56pub struct Api;
57
58impl crate::Api for Api {
59    const VARIANT: wgt::Backend = wgt::Backend::Vulkan;
60
61    type Instance = Instance;
62    type Surface = Surface;
63    type Adapter = Adapter;
64    type Device = Device;
65
66    type Queue = Queue;
67    type CommandEncoder = CommandEncoder;
68    type CommandBuffer = CommandBuffer;
69
70    type Buffer = Buffer;
71    type Texture = Texture;
72    type SurfaceTexture = SurfaceTexture;
73    type TextureView = TextureView;
74    type Sampler = Sampler;
75    type QuerySet = QuerySet;
76    type Fence = Fence;
77    type AccelerationStructure = AccelerationStructure;
78    type PipelineCache = PipelineCache;
79
80    type BindGroupLayout = BindGroupLayout;
81    type BindGroup = BindGroup;
82    type PipelineLayout = PipelineLayout;
83    type ShaderModule = ShaderModule;
84    type RenderPipeline = RenderPipeline;
85    type ComputePipeline = ComputePipeline;
86}
87
88crate::impl_dyn_resource!(
89    Adapter,
90    AccelerationStructure,
91    BindGroup,
92    BindGroupLayout,
93    Buffer,
94    CommandBuffer,
95    CommandEncoder,
96    ComputePipeline,
97    Device,
98    Fence,
99    Instance,
100    PipelineCache,
101    PipelineLayout,
102    QuerySet,
103    Queue,
104    RenderPipeline,
105    Sampler,
106    ShaderModule,
107    Surface,
108    SurfaceTexture,
109    Texture,
110    TextureView
111);
112
113struct DebugUtils {
114    extension: ext::debug_utils::Instance,
115    messenger: vk::DebugUtilsMessengerEXT,
116
117    /// Owning pointer to the debug messenger callback user data.
118    ///
119    /// `InstanceShared::drop` destroys the debug messenger before
120    /// dropping this, so the callback should never receive a dangling
121    /// user data pointer.
122    #[allow(dead_code)]
123    callback_data: Box<DebugUtilsMessengerUserData>,
124}
125
126pub struct DebugUtilsCreateInfo {
127    severity: vk::DebugUtilsMessageSeverityFlagsEXT,
128    message_type: vk::DebugUtilsMessageTypeFlagsEXT,
129    callback_data: Box<DebugUtilsMessengerUserData>,
130}
131
132#[derive(Debug)]
133/// The properties related to the validation layer needed for the
134/// DebugUtilsMessenger for their workarounds
135struct ValidationLayerProperties {
136    /// Validation layer description, from `vk::LayerProperties`.
137    layer_description: CString,
138
139    /// Validation layer specification version, from `vk::LayerProperties`.
140    layer_spec_version: u32,
141}
142
143/// User data needed by `instance::debug_utils_messenger_callback`.
144///
145/// When we create the [`vk::DebugUtilsMessengerEXT`], the `pUserData`
146/// pointer refers to one of these values.
147#[derive(Debug)]
148pub struct DebugUtilsMessengerUserData {
149    /// The properties related to the validation layer, if present
150    validation_layer_properties: Option<ValidationLayerProperties>,
151
152    /// If the OBS layer is present. OBS never increments the version of their layer,
153    /// so there's no reason to have the version.
154    has_obs_layer: bool,
155}
156
157pub struct InstanceShared {
158    raw: ash::Instance,
159    extensions: Vec<&'static CStr>,
160    drop_guard: Option<crate::DropGuard>,
161    flags: wgt::InstanceFlags,
162    memory_budget_thresholds: wgt::MemoryBudgetThresholds,
163    debug_utils: Option<DebugUtils>,
164    get_physical_device_properties: Option<khr::get_physical_device_properties2::Instance>,
165    entry: ash::Entry,
166    has_nv_optimus: bool,
167    android_sdk_version: u32,
168    /// The instance API version.
169    ///
170    /// Which is the version of Vulkan supported for instance-level functionality.
171    ///
172    /// It is associated with a `VkInstance` and its children,
173    /// except for a `VkPhysicalDevice` and its children.
174    instance_api_version: u32,
175}
176
177pub struct Instance {
178    shared: Arc<InstanceShared>,
179}
180
181/// Semaphore used to acquire a swapchain image.
182#[derive(Debug)]
183struct SwapchainAcquireSemaphore {
184    /// A semaphore that is signaled when this image is safe for us to modify.
185    ///
186    /// When [`vkAcquireNextImageKHR`] returns the index of the next swapchain
187    /// image that we should use, that image may actually still be in use by the
188    /// presentation engine, and is not yet safe to modify. However, that
189    /// function does accept a semaphore that it will signal when the image is
190    /// indeed safe to begin messing with.
191    ///
192    /// This semaphore is:
193    ///
194    /// - waited for by the first queue submission to operate on this image
195    ///   since it was acquired, and
196    ///
197    /// - signaled by [`vkAcquireNextImageKHR`] when the acquired image is ready
198    ///   for us to use.
199    ///
200    /// [`vkAcquireNextImageKHR`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkAcquireNextImageKHR
201    acquire: vk::Semaphore,
202
203    /// True if the next command submission operating on this image should wait
204    /// for [`acquire`].
205    ///
206    /// We must wait for `acquire` before drawing to this swapchain image, but
207    /// because `wgpu-hal` queue submissions are always strongly ordered, only
208    /// the first submission that works with a swapchain image actually needs to
209    /// wait. We set this flag when this image is acquired, and clear it the
210    /// first time it's passed to [`Queue::submit`] as a surface texture.
211    ///
212    /// Additionally, semaphores can only be waited on once, so we need to ensure
213    /// that we only actually pass this semaphore to the first submission that
214    /// uses that image.
215    ///
216    /// [`acquire`]: SwapchainAcquireSemaphore::acquire
217    /// [`Queue::submit`]: crate::Queue::submit
218    should_wait_for_acquire: bool,
219
220    /// The fence value of the last command submission that wrote to this image.
221    ///
222    /// The next time we try to acquire this image, we'll block until
223    /// this submission finishes, proving that [`acquire`] is ready to
224    /// pass to `vkAcquireNextImageKHR` again.
225    ///
226    /// [`acquire`]: SwapchainAcquireSemaphore::acquire
227    previously_used_submission_index: crate::FenceValue,
228}
229
230impl SwapchainAcquireSemaphore {
231    fn new(device: &DeviceShared, index: usize) -> Result<Self, crate::DeviceError> {
232        Ok(Self {
233            acquire: device
234                .new_binary_semaphore(&format!("SwapchainImageSemaphore: Index {index} acquire"))?,
235            should_wait_for_acquire: true,
236            previously_used_submission_index: 0,
237        })
238    }
239
240    /// Sets the fence value which the next acquire will wait for. This prevents
241    /// the semaphore from being used while the previous submission is still in flight.
242    fn set_used_fence_value(&mut self, value: crate::FenceValue) {
243        self.previously_used_submission_index = value;
244    }
245
246    /// Return the semaphore that commands drawing to this image should wait for, if any.
247    ///
248    /// This only returns `Some` once per acquisition; see
249    /// [`SwapchainAcquireSemaphore::should_wait_for_acquire`] for details.
250    fn get_acquire_wait_semaphore(&mut self) -> Option<vk::Semaphore> {
251        if self.should_wait_for_acquire {
252            self.should_wait_for_acquire = false;
253            Some(self.acquire)
254        } else {
255            None
256        }
257    }
258
259    /// Indicates the cpu-side usage of this semaphore has finished for the frame,
260    /// so reset internal state to be ready for the next frame.
261    fn end_semaphore_usage(&mut self) {
262        // Reset the acquire semaphore, so that the next time we acquire this
263        // image, we can wait for it again.
264        self.should_wait_for_acquire = true;
265    }
266
267    unsafe fn destroy(&self, device: &ash::Device) {
268        unsafe {
269            device.destroy_semaphore(self.acquire, None);
270        }
271    }
272}
273
274#[derive(Debug)]
275struct SwapchainPresentSemaphores {
276    /// A pool of semaphores for ordering presentation after drawing.
277    ///
278    /// The first [`present_index`] semaphores in this vector are:
279    ///
280    /// - all waited on by the call to [`vkQueuePresentKHR`] that presents this
281    ///   image, and
282    ///
283    /// - each signaled by some [`vkQueueSubmit`] queue submission that draws to
284    ///   this image, when the submission finishes execution.
285    ///
286    /// This vector accumulates one semaphore per submission that writes to this
287    /// image. This is awkward, but hard to avoid: [`vkQueuePresentKHR`]
288    /// requires a semaphore to order it with respect to drawing commands, and
289    /// we can't attach new completion semaphores to a command submission after
290    /// it's been submitted. This means that, at submission time, we must create
291    /// the semaphore we might need if the caller's next action is to enqueue a
292    /// presentation of this image.
293    ///
294    /// An alternative strategy would be for presentation to enqueue an empty
295    /// submit, ordered relative to other submits in the usual way, and
296    /// signaling a single presentation semaphore. But we suspect that submits
297    /// are usually expensive enough, and semaphores usually cheap enough, that
298    /// performance-sensitive users will avoid making many submits, so that the
299    /// cost of accumulated semaphores will usually be less than the cost of an
300    /// additional submit.
301    ///
302    /// Only the first [`present_index`] semaphores in the vector are actually
303    /// going to be signalled by submitted commands, and need to be waited for
304    /// by the next present call. Any semaphores beyond that index were created
305    /// for prior presents and are simply being retained for recycling.
306    ///
307    /// [`present_index`]: SwapchainPresentSemaphores::present_index
308    /// [`vkQueuePresentKHR`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueuePresentKHR
309    /// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
310    present: Vec<vk::Semaphore>,
311
312    /// The number of semaphores in [`present`] to be signalled for this submission.
313    ///
314    /// [`present`]: SwapchainPresentSemaphores::present
315    present_index: usize,
316
317    /// Which image this semaphore set is used for.
318    frame_index: usize,
319}
320
321impl SwapchainPresentSemaphores {
322    pub fn new(frame_index: usize) -> Self {
323        Self {
324            present: Vec::new(),
325            present_index: 0,
326            frame_index,
327        }
328    }
329
330    /// Return the semaphore that the next submission that writes to this image should
331    /// signal when it's done.
332    ///
333    /// See [`SwapchainPresentSemaphores::present`] for details.
334    fn get_submit_signal_semaphore(
335        &mut self,
336        device: &DeviceShared,
337    ) -> Result<vk::Semaphore, crate::DeviceError> {
338        // Try to recycle a semaphore we created for a previous presentation.
339        let sem = match self.present.get(self.present_index) {
340            Some(sem) => *sem,
341            None => {
342                let sem = device.new_binary_semaphore(&format!(
343                    "SwapchainImageSemaphore: Image {} present semaphore {}",
344                    self.frame_index, self.present_index
345                ))?;
346                self.present.push(sem);
347                sem
348            }
349        };
350
351        self.present_index += 1;
352
353        Ok(sem)
354    }
355
356    /// Indicates the cpu-side usage of this semaphore has finished for the frame,
357    /// so reset internal state to be ready for the next frame.
358    fn end_semaphore_usage(&mut self) {
359        // Reset the index to 0, so that the next time we get a semaphore, we
360        // start from the beginning of the list.
361        self.present_index = 0;
362    }
363
364    /// Return the semaphores that a presentation of this image should wait on.
365    ///
366    /// Return a slice of semaphores that the call to [`vkQueueSubmit`] that
367    /// ends this image's acquisition should wait for. See
368    /// [`SwapchainPresentSemaphores::present`] for details.
369    ///
370    /// Reset `self` to be ready for the next acquisition cycle.
371    ///
372    /// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
373    fn get_present_wait_semaphores(&mut self) -> Vec<vk::Semaphore> {
374        self.present[0..self.present_index].to_vec()
375    }
376
377    unsafe fn destroy(&self, device: &ash::Device) {
378        unsafe {
379            for sem in &self.present {
380                device.destroy_semaphore(*sem, None);
381            }
382        }
383    }
384}
385
386struct Swapchain {
387    raw: vk::SwapchainKHR,
388    functor: khr::swapchain::Device,
389    device: Arc<DeviceShared>,
390    images: Vec<vk::Image>,
391    config: crate::SurfaceConfiguration,
392
393    /// Semaphores used between image acquisition and the first submission
394    /// that uses that image. This is indexed using [`next_acquire_index`].
395    ///
396    /// Because we need to provide this to [`vkAcquireNextImageKHR`], we haven't
397    /// received the swapchain image index for the frame yet, so we cannot use
398    /// that to index it.
399    ///
400    /// Before we pass this to [`vkAcquireNextImageKHR`], we ensure that we wait on
401    /// the submission indicated by [`previously_used_submission_index`]. This enusres
402    /// the semaphore is no longer in use before we use it.
403    ///
404    /// [`next_acquire_index`]: Swapchain::next_acquire_index
405    /// [`vkAcquireNextImageKHR`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkAcquireNextImageKHR
406    /// [`previously_used_submission_index`]: SwapchainAcquireSemaphore::previously_used_submission_index
407    acquire_semaphores: Vec<Arc<Mutex<SwapchainAcquireSemaphore>>>,
408    /// The index of the next acquire semaphore to use.
409    ///
410    /// This is incremented each time we acquire a new image, and wraps around
411    /// to 0 when it reaches the end of [`acquire_semaphores`].
412    ///
413    /// [`acquire_semaphores`]: Swapchain::acquire_semaphores
414    next_acquire_index: usize,
415
416    /// Semaphore sets used between all submissions that write to an image and
417    /// the presentation of that image.
418    ///
419    /// This is indexed by the swapchain image index returned by
420    /// [`vkAcquireNextImageKHR`].
421    ///
422    /// We know it is safe to use these semaphores because use them
423    /// _after_ the acquire semaphore. Because the acquire semaphore
424    /// has been signaled, the previous presentation using that image
425    /// is known-finished, so this semaphore is no longer in use.
426    ///
427    /// [`vkAcquireNextImageKHR`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkAcquireNextImageKHR
428    present_semaphores: Vec<Arc<Mutex<SwapchainPresentSemaphores>>>,
429
430    /// The present timing information which will be set in the next call to [`present()`](crate::Queue::present()).
431    ///
432    /// # Safety
433    ///
434    /// This must only be set if [`wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING`] is enabled, and
435    /// so the VK_GOOGLE_display_timing extension is present.
436    next_present_time: Option<vk::PresentTimeGOOGLE>,
437}
438
439impl Swapchain {
440    /// Mark the current frame finished, advancing to the next acquire semaphore.
441    fn advance_acquire_semaphore(&mut self) {
442        let semaphore_count = self.acquire_semaphores.len();
443        self.next_acquire_index = (self.next_acquire_index + 1) % semaphore_count;
444    }
445
446    /// Get the next acquire semaphore that should be used with this swapchain.
447    fn get_acquire_semaphore(&self) -> Arc<Mutex<SwapchainAcquireSemaphore>> {
448        self.acquire_semaphores[self.next_acquire_index].clone()
449    }
450
451    /// Get the set of present semaphores that should be used with the given image index.
452    fn get_present_semaphores(&self, index: u32) -> Arc<Mutex<SwapchainPresentSemaphores>> {
453        self.present_semaphores[index as usize].clone()
454    }
455}
456
457pub struct Surface {
458    raw: vk::SurfaceKHR,
459    functor: khr::surface::Instance,
460    instance: Arc<InstanceShared>,
461    swapchain: RwLock<Option<Swapchain>>,
462}
463
464impl Surface {
465    /// Get the raw Vulkan swapchain associated with this surface.
466    ///
467    /// Returns [`None`] if the surface is not configured.
468    pub fn raw_swapchain(&self) -> Option<vk::SwapchainKHR> {
469        let read = self.swapchain.read();
470        read.as_ref().map(|it| it.raw)
471    }
472
473    /// Set the present timing information which will be used for the next [presentation](crate::Queue::present()) of this surface,
474    /// using [VK_GOOGLE_display_timing].
475    ///
476    /// This can be used to give an id to presentations, for future use of [`vk::PastPresentationTimingGOOGLE`].
477    /// Note that `wgpu-hal` does *not* provide a way to use that API - you should manually access this through [`ash`].
478    ///
479    /// This can also be used to add a "not before" timestamp to the presentation.
480    ///
481    /// The exact semantics of the fields are also documented in the [specification](https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPresentTimeGOOGLE.html) for the extension.
482    ///
483    /// # Panics
484    ///
485    /// - If the surface hasn't been configured.
486    /// - If the device doesn't [support present timing](wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING).
487    ///
488    /// [VK_GOOGLE_display_timing]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_GOOGLE_display_timing.html
489    #[track_caller]
490    pub fn set_next_present_time(&self, present_timing: vk::PresentTimeGOOGLE) {
491        let mut swapchain = self.swapchain.write();
492        let swapchain = swapchain
493            .as_mut()
494            .expect("Surface should have been configured");
495        let features = wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING;
496        if swapchain.device.features.contains(features) {
497            swapchain.next_present_time = Some(present_timing);
498        } else {
499            // Ideally we'd use something like `device.required_features` here, but that's in `wgpu-core`, which we are a dependency of
500            panic!(
501                concat!(
502                    "Tried to set display timing properties ",
503                    "without the corresponding feature ({:?}) enabled."
504                ),
505                features
506            );
507        }
508    }
509}
510
511#[derive(Debug)]
512pub struct SurfaceTexture {
513    index: u32,
514    texture: Texture,
515    acquire_semaphores: Arc<Mutex<SwapchainAcquireSemaphore>>,
516    present_semaphores: Arc<Mutex<SwapchainPresentSemaphores>>,
517}
518
519impl crate::DynSurfaceTexture for SurfaceTexture {}
520
521impl Borrow<Texture> for SurfaceTexture {
522    fn borrow(&self) -> &Texture {
523        &self.texture
524    }
525}
526
527impl Borrow<dyn crate::DynTexture> for SurfaceTexture {
528    fn borrow(&self) -> &dyn crate::DynTexture {
529        &self.texture
530    }
531}
532
533pub struct Adapter {
534    raw: vk::PhysicalDevice,
535    instance: Arc<InstanceShared>,
536    //queue_families: Vec<vk::QueueFamilyProperties>,
537    known_memory_flags: vk::MemoryPropertyFlags,
538    phd_capabilities: adapter::PhysicalDeviceProperties,
539    phd_features: PhysicalDeviceFeatures,
540    downlevel_flags: wgt::DownlevelFlags,
541    private_caps: PrivateCapabilities,
542    workarounds: Workarounds,
543}
544
545// TODO there's no reason why this can't be unified--the function pointers should all be the same--it's not clear how to do this with `ash`.
546enum ExtensionFn<T> {
547    /// The loaded function pointer struct for an extension.
548    Extension(T),
549    /// The extension was promoted to a core version of Vulkan and the functions on `ash`'s `DeviceV1_x` traits should be used.
550    Promoted,
551}
552
553struct DeviceExtensionFunctions {
554    debug_utils: Option<ext::debug_utils::Device>,
555    draw_indirect_count: Option<khr::draw_indirect_count::Device>,
556    timeline_semaphore: Option<ExtensionFn<khr::timeline_semaphore::Device>>,
557    ray_tracing: Option<RayTracingDeviceExtensionFunctions>,
558    mesh_shading: Option<ext::mesh_shader::Device>,
559}
560
561struct RayTracingDeviceExtensionFunctions {
562    acceleration_structure: khr::acceleration_structure::Device,
563    buffer_device_address: khr::buffer_device_address::Device,
564}
565
566/// Set of internal capabilities, which don't show up in the exposed
567/// device geometry, but affect the code paths taken internally.
568#[derive(Clone, Debug)]
569struct PrivateCapabilities {
570    image_view_usage: bool,
571    timeline_semaphores: bool,
572    texture_d24: bool,
573    texture_d24_s8: bool,
574    texture_s8: bool,
575    /// Ability to present contents to any screen. Only needed to work around broken platform configurations.
576    can_present: bool,
577    non_coherent_map_mask: wgt::BufferAddress,
578
579    /// True if this adapter advertises the [`robustBufferAccess`][vrba] feature.
580    ///
581    /// Note that Vulkan's `robustBufferAccess` is not sufficient to implement
582    /// `wgpu_hal`'s guarantee that shaders will not access buffer contents via
583    /// a given bindgroup binding outside that binding's [accessible
584    /// region][ar]. Enabling `robustBufferAccess` does ensure that
585    /// out-of-bounds reads and writes are not undefined behavior (that's good),
586    /// but still permits out-of-bounds reads to return data from anywhere
587    /// within the buffer, not just the accessible region.
588    ///
589    /// [ar]: ../struct.BufferBinding.html#accessible-region
590    /// [vrba]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#features-robustBufferAccess
591    robust_buffer_access: bool,
592
593    robust_image_access: bool,
594
595    /// True if this adapter supports the [`VK_EXT_robustness2`] extension's
596    /// [`robustBufferAccess2`] feature.
597    ///
598    /// This is sufficient to implement `wgpu_hal`'s [required bounds-checking][ar] of
599    /// shader accesses to buffer contents. If this feature is not available,
600    /// this backend must have Naga inject bounds checks in the generated
601    /// SPIR-V.
602    ///
603    /// [`VK_EXT_robustness2`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_EXT_robustness2.html
604    /// [`robustBufferAccess2`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPhysicalDeviceRobustness2FeaturesEXT.html#features-robustBufferAccess2
605    /// [ar]: ../struct.BufferBinding.html#accessible-region
606    robust_buffer_access2: bool,
607
608    robust_image_access2: bool,
609    zero_initialize_workgroup_memory: bool,
610    image_format_list: bool,
611    maximum_samplers: u32,
612
613    /// True if this adapter supports the [`VK_KHR_shader_integer_dot_product`] extension
614    /// (promoted to Vulkan 1.3).
615    ///
616    /// This is used to generate optimized code for WGSL's `dot4{I, U}8Packed`.
617    ///
618    /// [`VK_KHR_shader_integer_dot_product`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_KHR_shader_integer_dot_product.html
619    shader_integer_dot_product: bool,
620
621    /// True if this adapter supports 8-bit integers provided by the
622    /// [`VK_KHR_shader_float16_int8`] extension (promoted to Vulkan 1.2).
623    ///
624    /// Allows shaders to declare the "Int8" capability. Note, however, that this
625    /// feature alone allows the use of 8-bit integers "only in the `Private`,
626    /// `Workgroup` (for non-Block variables), and `Function` storage classes"
627    /// ([see spec]). To use 8-bit integers in the interface storage classes (e.g.,
628    /// `StorageBuffer`), you also need to enable the corresponding feature in
629    /// `VkPhysicalDevice8BitStorageFeatures` and declare the corresponding SPIR-V
630    /// capability (e.g., `StorageBuffer8BitAccess`).
631    ///
632    /// [`VK_KHR_shader_float16_int8`]: https://registry.khronos.org/vulkan/specs/latest/man/html/VK_KHR_shader_float16_int8.html
633    /// [see spec]: https://registry.khronos.org/vulkan/specs/latest/man/html/VkPhysicalDeviceShaderFloat16Int8Features.html#extension-features-shaderInt8
634    shader_int8: bool,
635}
636
637bitflags::bitflags!(
638    /// Workaround flags.
639    #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
640    pub struct Workarounds: u32 {
641        /// Only generate SPIR-V for one entry point at a time.
642        const SEPARATE_ENTRY_POINTS = 0x1;
643        /// Qualcomm OOMs when there are zero color attachments but a non-null pointer
644        /// to a subpass resolve attachment array. This nulls out that pointer in that case.
645        const EMPTY_RESOLVE_ATTACHMENT_LISTS = 0x2;
646        /// If the following code returns false, then nvidia will end up filling the wrong range.
647        ///
648        /// ```skip
649        /// fn nvidia_succeeds() -> bool {
650        ///   # let (copy_length, start_offset) = (0, 0);
651        ///     if copy_length >= 4096 {
652        ///         if start_offset % 16 != 0 {
653        ///             if copy_length == 4096 {
654        ///                 return true;
655        ///             }
656        ///             if copy_length % 16 == 0 {
657        ///                 return false;
658        ///             }
659        ///         }
660        ///     }
661        ///     true
662        /// }
663        /// ```
664        ///
665        /// As such, we need to make sure all calls to vkCmdFillBuffer are aligned to 16 bytes
666        /// if they cover a range of 4096 bytes or more.
667        const FORCE_FILL_BUFFER_WITH_SIZE_GREATER_4096_ALIGNED_OFFSET_16 = 0x4;
668    }
669);
670
671#[derive(Clone, Debug, Eq, Hash, PartialEq)]
672struct AttachmentKey {
673    format: vk::Format,
674    layout: vk::ImageLayout,
675    ops: crate::AttachmentOps,
676}
677
678impl AttachmentKey {
679    /// Returns an attachment key for a compatible attachment.
680    fn compatible(format: vk::Format, layout: vk::ImageLayout) -> Self {
681        Self {
682            format,
683            layout,
684            ops: crate::AttachmentOps::all(),
685        }
686    }
687}
688
689#[derive(Clone, Eq, Hash, PartialEq)]
690struct ColorAttachmentKey {
691    base: AttachmentKey,
692    resolve: Option<AttachmentKey>,
693}
694
695#[derive(Clone, Eq, Hash, PartialEq)]
696struct DepthStencilAttachmentKey {
697    base: AttachmentKey,
698    stencil_ops: crate::AttachmentOps,
699}
700
701#[derive(Clone, Eq, Default, Hash, PartialEq)]
702struct RenderPassKey {
703    colors: ArrayVec<Option<ColorAttachmentKey>, { crate::MAX_COLOR_ATTACHMENTS }>,
704    depth_stencil: Option<DepthStencilAttachmentKey>,
705    sample_count: u32,
706    multiview: Option<NonZeroU32>,
707}
708
709struct DeviceShared {
710    raw: ash::Device,
711    family_index: u32,
712    queue_index: u32,
713    raw_queue: vk::Queue,
714    drop_guard: Option<crate::DropGuard>,
715    instance: Arc<InstanceShared>,
716    physical_device: vk::PhysicalDevice,
717    enabled_extensions: Vec<&'static CStr>,
718    extension_fns: DeviceExtensionFunctions,
719    vendor_id: u32,
720    pipeline_cache_validation_key: [u8; 16],
721    timestamp_period: f32,
722    private_caps: PrivateCapabilities,
723    workarounds: Workarounds,
724    features: wgt::Features,
725    render_passes: Mutex<FastHashMap<RenderPassKey, vk::RenderPass>>,
726    sampler_cache: Mutex<sampler::SamplerCache>,
727    memory_allocations_counter: InternalCounter,
728
729    /// Because we have cached framebuffers which are not deleted from until
730    /// the device is destroyed, if the implementation of vulkan re-uses handles
731    /// we need some way to differentiate between the old handle and the new handle.
732    /// This factory allows us to have a dedicated identity value for each texture.
733    texture_identity_factory: ResourceIdentityFactory<vk::Image>,
734    /// As above, for texture views.
735    texture_view_identity_factory: ResourceIdentityFactory<vk::ImageView>,
736}
737
738impl Drop for DeviceShared {
739    fn drop(&mut self) {
740        for &raw in self.render_passes.lock().values() {
741            unsafe { self.raw.destroy_render_pass(raw, None) };
742        }
743        if self.drop_guard.is_none() {
744            unsafe { self.raw.destroy_device(None) };
745        }
746    }
747}
748
749pub struct Device {
750    shared: Arc<DeviceShared>,
751    mem_allocator: Mutex<gpu_alloc::GpuAllocator<vk::DeviceMemory>>,
752    desc_allocator:
753        Mutex<gpu_descriptor::DescriptorAllocator<vk::DescriptorPool, vk::DescriptorSet>>,
754    valid_ash_memory_types: u32,
755    naga_options: naga::back::spv::Options<'static>,
756    #[cfg(feature = "renderdoc")]
757    render_doc: crate::auxil::renderdoc::RenderDoc,
758    counters: Arc<wgt::HalCounters>,
759}
760
761impl Drop for Device {
762    fn drop(&mut self) {
763        unsafe { self.mem_allocator.lock().cleanup(&*self.shared) };
764        unsafe { self.desc_allocator.lock().cleanup(&*self.shared) };
765    }
766}
767
768/// Semaphores for forcing queue submissions to run in order.
769///
770/// The [`wgpu_hal::Queue`] trait promises that if two calls to [`submit`] are
771/// ordered, then the first submission will finish on the GPU before the second
772/// submission begins. To get this behavior on Vulkan we need to pass semaphores
773/// to [`vkQueueSubmit`] for the commands to wait on before beginning execution,
774/// and to signal when their execution is done.
775///
776/// Normally this can be done with a single semaphore, waited on and then
777/// signalled for each submission. At any given time there's exactly one
778/// submission that would signal the semaphore, and exactly one waiting on it,
779/// as Vulkan requires.
780///
781/// However, as of Oct 2021, bug [#5508] in the Mesa ANV drivers caused them to
782/// hang if we use a single semaphore. The workaround is to alternate between
783/// two semaphores. The bug has been fixed in Mesa, but we should probably keep
784/// the workaround until, say, Oct 2026.
785///
786/// [`wgpu_hal::Queue`]: crate::Queue
787/// [`submit`]: crate::Queue::submit
788/// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
789/// [#5508]: https://gitlab.freedesktop.org/mesa/mesa/-/issues/5508
790#[derive(Clone)]
791struct RelaySemaphores {
792    /// The semaphore the next submission should wait on before beginning
793    /// execution on the GPU. This is `None` for the first submission, which
794    /// should not wait on anything at all.
795    wait: Option<vk::Semaphore>,
796
797    /// The semaphore the next submission should signal when it has finished
798    /// execution on the GPU.
799    signal: vk::Semaphore,
800}
801
802impl RelaySemaphores {
803    fn new(device: &DeviceShared) -> Result<Self, crate::DeviceError> {
804        Ok(Self {
805            wait: None,
806            signal: device.new_binary_semaphore("RelaySemaphores: 1")?,
807        })
808    }
809
810    /// Advances the semaphores, returning the semaphores that should be used for a submission.
811    fn advance(&mut self, device: &DeviceShared) -> Result<Self, crate::DeviceError> {
812        let old = self.clone();
813
814        // Build the state for the next submission.
815        match self.wait {
816            None => {
817                // The `old` values describe the first submission to this queue.
818                // The second submission should wait on `old.signal`, and then
819                // signal a new semaphore which we'll create now.
820                self.wait = Some(old.signal);
821                self.signal = device.new_binary_semaphore("RelaySemaphores: 2")?;
822            }
823            Some(ref mut wait) => {
824                // What this submission signals, the next should wait.
825                mem::swap(wait, &mut self.signal);
826            }
827        };
828
829        Ok(old)
830    }
831
832    /// Destroys the semaphores.
833    unsafe fn destroy(&self, device: &ash::Device) {
834        unsafe {
835            if let Some(wait) = self.wait {
836                device.destroy_semaphore(wait, None);
837            }
838            device.destroy_semaphore(self.signal, None);
839        }
840    }
841}
842
843pub struct Queue {
844    raw: vk::Queue,
845    swapchain_fn: khr::swapchain::Device,
846    device: Arc<DeviceShared>,
847    family_index: u32,
848    relay_semaphores: Mutex<RelaySemaphores>,
849    signal_semaphores: Mutex<SemaphoreList>,
850}
851
852impl Queue {
853    pub fn as_raw(&self) -> vk::Queue {
854        self.raw
855    }
856}
857
858impl Drop for Queue {
859    fn drop(&mut self) {
860        unsafe { self.relay_semaphores.lock().destroy(&self.device.raw) };
861    }
862}
863#[derive(Debug)]
864enum BufferMemoryBacking {
865    Managed(gpu_alloc::MemoryBlock<vk::DeviceMemory>),
866    VulkanMemory {
867        memory: vk::DeviceMemory,
868        offset: u64,
869        size: u64,
870    },
871}
872impl BufferMemoryBacking {
873    fn memory(&self) -> &vk::DeviceMemory {
874        match self {
875            Self::Managed(m) => m.memory(),
876            Self::VulkanMemory { memory, .. } => memory,
877        }
878    }
879    fn offset(&self) -> u64 {
880        match self {
881            Self::Managed(m) => m.offset(),
882            Self::VulkanMemory { offset, .. } => *offset,
883        }
884    }
885    fn size(&self) -> u64 {
886        match self {
887            Self::Managed(m) => m.size(),
888            Self::VulkanMemory { size, .. } => *size,
889        }
890    }
891}
892#[derive(Debug)]
893pub struct Buffer {
894    raw: vk::Buffer,
895    block: Option<Mutex<BufferMemoryBacking>>,
896}
897impl Buffer {
898    /// # Safety
899    ///
900    /// - `vk_buffer`'s memory must be managed by the caller
901    /// - Externally imported buffers can't be mapped by `wgpu`
902    pub unsafe fn from_raw(vk_buffer: vk::Buffer) -> Self {
903        Self {
904            raw: vk_buffer,
905            block: None,
906        }
907    }
908    /// # Safety
909    /// - We will use this buffer and the buffer's backing memory range as if we have exclusive ownership over it, until the wgpu resource is dropped and the wgpu-hal object is cleaned up
910    /// - Externally imported buffers can't be mapped by `wgpu`
911    /// - `offset` and `size` must be valid with the allocation of `memory`
912    pub unsafe fn from_raw_managed(
913        vk_buffer: vk::Buffer,
914        memory: vk::DeviceMemory,
915        offset: u64,
916        size: u64,
917    ) -> Self {
918        Self {
919            raw: vk_buffer,
920            block: Some(Mutex::new(BufferMemoryBacking::VulkanMemory {
921                memory,
922                offset,
923                size,
924            })),
925        }
926    }
927}
928
929impl crate::DynBuffer for Buffer {}
930
931#[derive(Debug)]
932pub struct AccelerationStructure {
933    raw: vk::AccelerationStructureKHR,
934    buffer: vk::Buffer,
935    block: Mutex<gpu_alloc::MemoryBlock<vk::DeviceMemory>>,
936    compacted_size_query: Option<vk::QueryPool>,
937}
938
939impl crate::DynAccelerationStructure for AccelerationStructure {}
940
941#[derive(Debug)]
942pub struct Texture {
943    raw: vk::Image,
944    drop_guard: Option<crate::DropGuard>,
945    external_memory: Option<vk::DeviceMemory>,
946    block: Option<gpu_alloc::MemoryBlock<vk::DeviceMemory>>,
947    format: wgt::TextureFormat,
948    copy_size: crate::CopyExtent,
949    identity: ResourceIdentity<vk::Image>,
950}
951
952impl crate::DynTexture for Texture {}
953
954impl Texture {
955    /// # Safety
956    ///
957    /// - The image handle must not be manually destroyed
958    pub unsafe fn raw_handle(&self) -> vk::Image {
959        self.raw
960    }
961}
962
963#[derive(Debug)]
964pub struct TextureView {
965    raw_texture: vk::Image,
966    raw: vk::ImageView,
967    layers: NonZeroU32,
968    format: wgt::TextureFormat,
969    raw_format: vk::Format,
970    base_mip_level: u32,
971    dimension: wgt::TextureViewDimension,
972    texture_identity: ResourceIdentity<vk::Image>,
973    view_identity: ResourceIdentity<vk::ImageView>,
974}
975
976impl crate::DynTextureView for TextureView {}
977
978impl TextureView {
979    /// # Safety
980    ///
981    /// - The image view handle must not be manually destroyed
982    pub unsafe fn raw_handle(&self) -> vk::ImageView {
983        self.raw
984    }
985
986    /// Returns the raw texture view, along with its identity.
987    fn identified_raw_view(&self) -> IdentifiedTextureView {
988        IdentifiedTextureView {
989            raw: self.raw,
990            identity: self.view_identity,
991        }
992    }
993}
994
995#[derive(Debug)]
996pub struct Sampler {
997    raw: vk::Sampler,
998    create_info: vk::SamplerCreateInfo<'static>,
999}
1000
1001impl crate::DynSampler for Sampler {}
1002
1003#[derive(Debug)]
1004pub struct BindGroupLayout {
1005    raw: vk::DescriptorSetLayout,
1006    desc_count: gpu_descriptor::DescriptorTotalCount,
1007    types: Box<[(vk::DescriptorType, u32)]>,
1008    /// Map of binding index to size,
1009    binding_arrays: Vec<(u32, NonZeroU32)>,
1010}
1011
1012impl crate::DynBindGroupLayout for BindGroupLayout {}
1013
1014#[derive(Debug)]
1015pub struct PipelineLayout {
1016    raw: vk::PipelineLayout,
1017    binding_arrays: naga::back::spv::BindingMap,
1018}
1019
1020impl crate::DynPipelineLayout for PipelineLayout {}
1021
1022#[derive(Debug)]
1023pub struct BindGroup {
1024    set: gpu_descriptor::DescriptorSet<vk::DescriptorSet>,
1025}
1026
1027impl crate::DynBindGroup for BindGroup {}
1028
1029/// Miscellaneous allocation recycling pool for `CommandAllocator`.
1030#[derive(Default)]
1031struct Temp {
1032    marker: Vec<u8>,
1033    buffer_barriers: Vec<vk::BufferMemoryBarrier<'static>>,
1034    image_barriers: Vec<vk::ImageMemoryBarrier<'static>>,
1035}
1036
1037impl Temp {
1038    fn clear(&mut self) {
1039        self.marker.clear();
1040        self.buffer_barriers.clear();
1041        self.image_barriers.clear();
1042    }
1043
1044    fn make_c_str(&mut self, name: &str) -> &CStr {
1045        self.marker.clear();
1046        self.marker.extend_from_slice(name.as_bytes());
1047        self.marker.push(0);
1048        unsafe { CStr::from_bytes_with_nul_unchecked(&self.marker) }
1049    }
1050}
1051
1052/// Generates unique IDs for each resource of type `T`.
1053///
1054/// Because vk handles are not permanently unique, this
1055/// provides a way to generate unique IDs for each resource.
1056struct ResourceIdentityFactory<T> {
1057    #[cfg(not(target_has_atomic = "64"))]
1058    next_id: Mutex<u64>,
1059    #[cfg(target_has_atomic = "64")]
1060    next_id: core::sync::atomic::AtomicU64,
1061    _phantom: PhantomData<T>,
1062}
1063
1064impl<T> ResourceIdentityFactory<T> {
1065    fn new() -> Self {
1066        Self {
1067            #[cfg(not(target_has_atomic = "64"))]
1068            next_id: Mutex::new(0),
1069            #[cfg(target_has_atomic = "64")]
1070            next_id: core::sync::atomic::AtomicU64::new(0),
1071            _phantom: PhantomData,
1072        }
1073    }
1074
1075    /// Returns a new unique ID for a resource of type `T`.
1076    fn next(&self) -> ResourceIdentity<T> {
1077        #[cfg(not(target_has_atomic = "64"))]
1078        {
1079            let mut next_id = self.next_id.lock();
1080            let id = *next_id;
1081            *next_id += 1;
1082            ResourceIdentity {
1083                id,
1084                _phantom: PhantomData,
1085            }
1086        }
1087
1088        #[cfg(target_has_atomic = "64")]
1089        ResourceIdentity {
1090            id: self
1091                .next_id
1092                .fetch_add(1, core::sync::atomic::Ordering::Relaxed),
1093            _phantom: PhantomData,
1094        }
1095    }
1096}
1097
1098/// A unique identifier for a resource of type `T`.
1099///
1100/// This is used as a hashable key for resources, which
1101/// is permanently unique through the lifetime of the program.
1102#[derive(Debug, Copy, Clone, Eq, Hash, PartialEq)]
1103struct ResourceIdentity<T> {
1104    id: u64,
1105    _phantom: PhantomData<T>,
1106}
1107
1108#[derive(Clone, Eq, Hash, PartialEq)]
1109struct FramebufferKey {
1110    raw_pass: vk::RenderPass,
1111    /// Because this is used as a key in a hash map, we need to include the identity
1112    /// so that this hashes differently, even if the ImageView handles are the same
1113    /// between different views.
1114    attachment_identities: ArrayVec<ResourceIdentity<vk::ImageView>, { MAX_TOTAL_ATTACHMENTS }>,
1115    /// While this is redundant for calculating the hash, we need access to an array
1116    /// of all the raw ImageViews when we are creating the actual framebuffer,
1117    /// so we store this here.
1118    attachment_views: ArrayVec<vk::ImageView, { MAX_TOTAL_ATTACHMENTS }>,
1119    extent: wgt::Extent3d,
1120}
1121
1122impl FramebufferKey {
1123    fn push_view(&mut self, view: IdentifiedTextureView) {
1124        self.attachment_identities.push(view.identity);
1125        self.attachment_views.push(view.raw);
1126    }
1127}
1128
1129/// A texture view paired with its identity.
1130#[derive(Copy, Clone)]
1131struct IdentifiedTextureView {
1132    raw: vk::ImageView,
1133    identity: ResourceIdentity<vk::ImageView>,
1134}
1135
1136#[derive(Clone, Eq, Hash, PartialEq)]
1137struct TempTextureViewKey {
1138    texture: vk::Image,
1139    /// As this is used in a hashmap, we need to
1140    /// include the identity so that this hashes differently,
1141    /// even if the Image handles are the same between different images.
1142    texture_identity: ResourceIdentity<vk::Image>,
1143    format: vk::Format,
1144    mip_level: u32,
1145    depth_slice: u32,
1146}
1147
1148pub struct CommandEncoder {
1149    raw: vk::CommandPool,
1150    device: Arc<DeviceShared>,
1151
1152    /// The current command buffer, if `self` is in the ["recording"]
1153    /// state.
1154    ///
1155    /// ["recording"]: crate::CommandEncoder
1156    ///
1157    /// If non-`null`, the buffer is in the Vulkan "recording" state.
1158    active: vk::CommandBuffer,
1159
1160    /// What kind of pass we are currently within: compute or render.
1161    bind_point: vk::PipelineBindPoint,
1162
1163    /// Allocation recycling pool for this encoder.
1164    temp: Temp,
1165
1166    /// A pool of available command buffers.
1167    ///
1168    /// These are all in the Vulkan "initial" state.
1169    free: Vec<vk::CommandBuffer>,
1170
1171    /// A pool of discarded command buffers.
1172    ///
1173    /// These could be in any Vulkan state except "pending".
1174    discarded: Vec<vk::CommandBuffer>,
1175
1176    /// If this is true, the active renderpass enabled a debug span,
1177    /// and needs to be disabled on renderpass close.
1178    rpass_debug_marker_active: bool,
1179
1180    /// If set, the end of the next render/compute pass will write a timestamp at
1181    /// the given pool & location.
1182    end_of_pass_timer_query: Option<(vk::QueryPool, u32)>,
1183
1184    framebuffers: FastHashMap<FramebufferKey, vk::Framebuffer>,
1185    temp_texture_views: FastHashMap<TempTextureViewKey, IdentifiedTextureView>,
1186
1187    counters: Arc<wgt::HalCounters>,
1188}
1189
1190impl Drop for CommandEncoder {
1191    fn drop(&mut self) {
1192        // SAFETY:
1193        //
1194        // VUID-vkDestroyCommandPool-commandPool-00041: wgpu_hal requires that a
1195        // `CommandBuffer` must live until its execution is complete, and that a
1196        // `CommandBuffer` must not outlive the `CommandEncoder` that built it.
1197        // Thus, we know that none of our `CommandBuffers` are in the "pending"
1198        // state.
1199        //
1200        // The other VUIDs are pretty obvious.
1201        unsafe {
1202            // `vkDestroyCommandPool` also frees any command buffers allocated
1203            // from that pool, so there's no need to explicitly call
1204            // `vkFreeCommandBuffers` on `cmd_encoder`'s `free` and `discarded`
1205            // fields.
1206            self.device.raw.destroy_command_pool(self.raw, None);
1207        }
1208
1209        for (_, fb) in self.framebuffers.drain() {
1210            unsafe { self.device.raw.destroy_framebuffer(fb, None) };
1211        }
1212
1213        for (_, view) in self.temp_texture_views.drain() {
1214            unsafe { self.device.raw.destroy_image_view(view.raw, None) };
1215        }
1216
1217        self.counters.command_encoders.sub(1);
1218    }
1219}
1220
1221impl CommandEncoder {
1222    /// # Safety
1223    ///
1224    /// - The command buffer handle must not be manually destroyed
1225    pub unsafe fn raw_handle(&self) -> vk::CommandBuffer {
1226        self.active
1227    }
1228}
1229
1230impl fmt::Debug for CommandEncoder {
1231    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1232        f.debug_struct("CommandEncoder")
1233            .field("raw", &self.raw)
1234            .finish()
1235    }
1236}
1237
1238#[derive(Debug)]
1239pub struct CommandBuffer {
1240    raw: vk::CommandBuffer,
1241}
1242
1243impl crate::DynCommandBuffer for CommandBuffer {}
1244
1245#[derive(Debug)]
1246#[allow(clippy::large_enum_variant)]
1247pub enum ShaderModule {
1248    Raw(vk::ShaderModule),
1249    Intermediate {
1250        naga_shader: crate::NagaShader,
1251        runtime_checks: wgt::ShaderRuntimeChecks,
1252    },
1253}
1254
1255impl crate::DynShaderModule for ShaderModule {}
1256
1257#[derive(Debug)]
1258pub struct RenderPipeline {
1259    raw: vk::Pipeline,
1260}
1261
1262impl crate::DynRenderPipeline for RenderPipeline {}
1263
1264#[derive(Debug)]
1265pub struct ComputePipeline {
1266    raw: vk::Pipeline,
1267}
1268
1269impl crate::DynComputePipeline for ComputePipeline {}
1270
1271#[derive(Debug)]
1272pub struct PipelineCache {
1273    raw: vk::PipelineCache,
1274}
1275
1276impl crate::DynPipelineCache for PipelineCache {}
1277
1278#[derive(Debug)]
1279pub struct QuerySet {
1280    raw: vk::QueryPool,
1281}
1282
1283impl crate::DynQuerySet for QuerySet {}
1284
1285/// The [`Api::Fence`] type for [`vulkan::Api`].
1286///
1287/// This is an `enum` because there are two possible implementations of
1288/// `wgpu-hal` fences on Vulkan: Vulkan fences, which work on any version of
1289/// Vulkan, and Vulkan timeline semaphores, which are easier and cheaper but
1290/// require non-1.0 features.
1291///
1292/// [`Device::create_fence`] returns a [`TimelineSemaphore`] if
1293/// [`VK_KHR_timeline_semaphore`] is available and enabled, and a [`FencePool`]
1294/// otherwise.
1295///
1296/// [`Api::Fence`]: crate::Api::Fence
1297/// [`vulkan::Api`]: Api
1298/// [`Device::create_fence`]: crate::Device::create_fence
1299/// [`TimelineSemaphore`]: Fence::TimelineSemaphore
1300/// [`VK_KHR_timeline_semaphore`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VK_KHR_timeline_semaphore
1301/// [`FencePool`]: Fence::FencePool
1302#[derive(Debug)]
1303pub enum Fence {
1304    /// A Vulkan [timeline semaphore].
1305    ///
1306    /// These are simpler to use than Vulkan fences, since timeline semaphores
1307    /// work exactly the way [`wpgu_hal::Api::Fence`] is specified to work.
1308    ///
1309    /// [timeline semaphore]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-semaphores
1310    /// [`wpgu_hal::Api::Fence`]: crate::Api::Fence
1311    TimelineSemaphore(vk::Semaphore),
1312
1313    /// A collection of Vulkan [fence]s, each associated with a [`FenceValue`].
1314    ///
1315    /// The effective [`FenceValue`] of this variant is the greater of
1316    /// `last_completed` and the maximum value associated with a signalled fence
1317    /// in `active`.
1318    ///
1319    /// Fences are available in all versions of Vulkan, but since they only have
1320    /// two states, "signaled" and "unsignaled", we need to use a separate fence
1321    /// for each queue submission we might want to wait for, and remember which
1322    /// [`FenceValue`] each one represents.
1323    ///
1324    /// [fence]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-fences
1325    /// [`FenceValue`]: crate::FenceValue
1326    FencePool {
1327        last_completed: crate::FenceValue,
1328        /// The pending fence values have to be ascending.
1329        active: Vec<(crate::FenceValue, vk::Fence)>,
1330        free: Vec<vk::Fence>,
1331    },
1332}
1333
1334impl crate::DynFence for Fence {}
1335
1336impl Fence {
1337    /// Return the highest [`FenceValue`] among the signalled fences in `active`.
1338    ///
1339    /// As an optimization, assume that we already know that the fence has
1340    /// reached `last_completed`, and don't bother checking fences whose values
1341    /// are less than that: those fences remain in the `active` array only
1342    /// because we haven't called `maintain` yet to clean them up.
1343    ///
1344    /// [`FenceValue`]: crate::FenceValue
1345    fn check_active(
1346        device: &ash::Device,
1347        mut last_completed: crate::FenceValue,
1348        active: &[(crate::FenceValue, vk::Fence)],
1349    ) -> Result<crate::FenceValue, crate::DeviceError> {
1350        for &(value, raw) in active.iter() {
1351            unsafe {
1352                if value > last_completed
1353                    && device
1354                        .get_fence_status(raw)
1355                        .map_err(map_host_device_oom_and_lost_err)?
1356                {
1357                    last_completed = value;
1358                }
1359            }
1360        }
1361        Ok(last_completed)
1362    }
1363
1364    /// Return the highest signalled [`FenceValue`] for `self`.
1365    ///
1366    /// [`FenceValue`]: crate::FenceValue
1367    fn get_latest(
1368        &self,
1369        device: &ash::Device,
1370        extension: Option<&ExtensionFn<khr::timeline_semaphore::Device>>,
1371    ) -> Result<crate::FenceValue, crate::DeviceError> {
1372        match *self {
1373            Self::TimelineSemaphore(raw) => unsafe {
1374                Ok(match *extension.unwrap() {
1375                    ExtensionFn::Extension(ref ext) => ext
1376                        .get_semaphore_counter_value(raw)
1377                        .map_err(map_host_device_oom_and_lost_err)?,
1378                    ExtensionFn::Promoted => device
1379                        .get_semaphore_counter_value(raw)
1380                        .map_err(map_host_device_oom_and_lost_err)?,
1381                })
1382            },
1383            Self::FencePool {
1384                last_completed,
1385                ref active,
1386                free: _,
1387            } => Self::check_active(device, last_completed, active),
1388        }
1389    }
1390
1391    /// Trim the internal state of this [`Fence`].
1392    ///
1393    /// This function has no externally visible effect, but you should call it
1394    /// periodically to keep this fence's resource consumption under control.
1395    ///
1396    /// For fences using the [`FencePool`] implementation, this function
1397    /// recycles fences that have been signaled. If you don't call this,
1398    /// [`Queue::submit`] will just keep allocating a new Vulkan fence every
1399    /// time it's called.
1400    ///
1401    /// [`FencePool`]: Fence::FencePool
1402    /// [`Queue::submit`]: crate::Queue::submit
1403    fn maintain(&mut self, device: &ash::Device) -> Result<(), crate::DeviceError> {
1404        match *self {
1405            Self::TimelineSemaphore(_) => {}
1406            Self::FencePool {
1407                ref mut last_completed,
1408                ref mut active,
1409                ref mut free,
1410            } => {
1411                let latest = Self::check_active(device, *last_completed, active)?;
1412                let base_free = free.len();
1413                for &(value, raw) in active.iter() {
1414                    if value <= latest {
1415                        free.push(raw);
1416                    }
1417                }
1418                if free.len() != base_free {
1419                    active.retain(|&(value, _)| value > latest);
1420                    unsafe { device.reset_fences(&free[base_free..]) }
1421                        .map_err(map_device_oom_err)?
1422                }
1423                *last_completed = latest;
1424            }
1425        }
1426        Ok(())
1427    }
1428}
1429
1430impl crate::Queue for Queue {
1431    type A = Api;
1432
1433    unsafe fn submit(
1434        &self,
1435        command_buffers: &[&CommandBuffer],
1436        surface_textures: &[&SurfaceTexture],
1437        (signal_fence, signal_value): (&mut Fence, crate::FenceValue),
1438    ) -> Result<(), crate::DeviceError> {
1439        let mut fence_raw = vk::Fence::null();
1440
1441        let mut wait_stage_masks = Vec::new();
1442        let mut wait_semaphores = Vec::new();
1443        let mut signal_semaphores = SemaphoreList::default();
1444
1445        // Double check that the same swapchain image isn't being given to us multiple times,
1446        // as that will deadlock when we try to lock them all.
1447        debug_assert!(
1448            {
1449                let mut check = HashSet::with_capacity(surface_textures.len());
1450                // We compare the Arcs by pointer, as Eq isn't well defined for SurfaceSemaphores.
1451                for st in surface_textures {
1452                    check.insert(Arc::as_ptr(&st.acquire_semaphores) as usize);
1453                    check.insert(Arc::as_ptr(&st.present_semaphores) as usize);
1454                }
1455                check.len() == surface_textures.len() * 2
1456            },
1457            "More than one surface texture is being used from the same swapchain. This will cause a deadlock in release."
1458        );
1459
1460        let locked_swapchain_semaphores = surface_textures
1461            .iter()
1462            .map(|st| {
1463                let acquire = st
1464                    .acquire_semaphores
1465                    .try_lock()
1466                    .expect("Failed to lock surface acquire semaphore");
1467                let present = st
1468                    .present_semaphores
1469                    .try_lock()
1470                    .expect("Failed to lock surface present semaphore");
1471
1472                (acquire, present)
1473            })
1474            .collect::<Vec<_>>();
1475
1476        for (mut acquire_semaphore, mut present_semaphores) in locked_swapchain_semaphores {
1477            acquire_semaphore.set_used_fence_value(signal_value);
1478
1479            // If we're the first submission to operate on this image, wait on
1480            // its acquire semaphore, to make sure the presentation engine is
1481            // done with it.
1482            if let Some(sem) = acquire_semaphore.get_acquire_wait_semaphore() {
1483                wait_stage_masks.push(vk::PipelineStageFlags::TOP_OF_PIPE);
1484                wait_semaphores.push(sem);
1485            }
1486
1487            // Get a semaphore to signal when we're done writing to this surface
1488            // image. Presentation of this image will wait for this.
1489            let signal_semaphore = present_semaphores.get_submit_signal_semaphore(&self.device)?;
1490            signal_semaphores.push_binary(signal_semaphore);
1491        }
1492
1493        let mut guard = self.signal_semaphores.lock();
1494        if !guard.is_empty() {
1495            signal_semaphores.append(&mut guard);
1496        }
1497
1498        // In order for submissions to be strictly ordered, we encode a dependency between each submission
1499        // using a pair of semaphores. This adds a wait if it is needed, and signals the next semaphore.
1500        let semaphore_state = self.relay_semaphores.lock().advance(&self.device)?;
1501
1502        if let Some(sem) = semaphore_state.wait {
1503            wait_stage_masks.push(vk::PipelineStageFlags::TOP_OF_PIPE);
1504            wait_semaphores.push(sem);
1505        }
1506
1507        signal_semaphores.push_binary(semaphore_state.signal);
1508
1509        // We need to signal our wgpu::Fence if we have one, this adds it to the signal list.
1510        signal_fence.maintain(&self.device.raw)?;
1511        match *signal_fence {
1512            Fence::TimelineSemaphore(raw) => {
1513                signal_semaphores.push_timeline(raw, signal_value);
1514            }
1515            Fence::FencePool {
1516                ref mut active,
1517                ref mut free,
1518                ..
1519            } => {
1520                fence_raw = match free.pop() {
1521                    Some(raw) => raw,
1522                    None => unsafe {
1523                        self.device
1524                            .raw
1525                            .create_fence(&vk::FenceCreateInfo::default(), None)
1526                            .map_err(map_host_device_oom_err)?
1527                    },
1528                };
1529                active.push((signal_value, fence_raw));
1530            }
1531        }
1532
1533        let vk_cmd_buffers = command_buffers
1534            .iter()
1535            .map(|cmd| cmd.raw)
1536            .collect::<Vec<_>>();
1537
1538        let mut vk_info = vk::SubmitInfo::default().command_buffers(&vk_cmd_buffers);
1539
1540        vk_info = vk_info
1541            .wait_semaphores(&wait_semaphores)
1542            .wait_dst_stage_mask(&wait_stage_masks);
1543
1544        let mut vk_timeline_info = mem::MaybeUninit::uninit();
1545        vk_info = signal_semaphores.add_to_submit(vk_info, &mut vk_timeline_info);
1546
1547        profiling::scope!("vkQueueSubmit");
1548        unsafe {
1549            self.device
1550                .raw
1551                .queue_submit(self.raw, &[vk_info], fence_raw)
1552                .map_err(map_host_device_oom_and_lost_err)?
1553        };
1554        Ok(())
1555    }
1556
1557    unsafe fn present(
1558        &self,
1559        surface: &Surface,
1560        texture: SurfaceTexture,
1561    ) -> Result<(), crate::SurfaceError> {
1562        let mut swapchain = surface.swapchain.write();
1563        let ssc = swapchain.as_mut().unwrap();
1564        let mut acquire_semaphore = texture.acquire_semaphores.lock();
1565        let mut present_semaphores = texture.present_semaphores.lock();
1566
1567        let wait_semaphores = present_semaphores.get_present_wait_semaphores();
1568
1569        // Reset the acquire and present semaphores internal state
1570        // to be ready for the next frame.
1571        //
1572        // We do this before the actual call to present to ensure that
1573        // even if this method errors and early outs, we have reset
1574        // the state for next frame.
1575        acquire_semaphore.end_semaphore_usage();
1576        present_semaphores.end_semaphore_usage();
1577
1578        drop(acquire_semaphore);
1579
1580        let swapchains = [ssc.raw];
1581        let image_indices = [texture.index];
1582        let vk_info = vk::PresentInfoKHR::default()
1583            .swapchains(&swapchains)
1584            .image_indices(&image_indices)
1585            .wait_semaphores(&wait_semaphores);
1586
1587        let mut display_timing;
1588        let present_times;
1589        let vk_info = if let Some(present_time) = ssc.next_present_time.take() {
1590            debug_assert!(
1591                ssc.device
1592                    .features
1593                    .contains(wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING),
1594                "`next_present_time` should only be set if `VULKAN_GOOGLE_DISPLAY_TIMING` is enabled"
1595            );
1596            present_times = [present_time];
1597            display_timing = vk::PresentTimesInfoGOOGLE::default().times(&present_times);
1598            // SAFETY: We know that VK_GOOGLE_display_timing is present because of the safety contract on `next_present_time`.
1599            vk_info.push_next(&mut display_timing)
1600        } else {
1601            vk_info
1602        };
1603
1604        let suboptimal = {
1605            profiling::scope!("vkQueuePresentKHR");
1606            unsafe { self.swapchain_fn.queue_present(self.raw, &vk_info) }.map_err(|error| {
1607                match error {
1608                    vk::Result::ERROR_OUT_OF_DATE_KHR => crate::SurfaceError::Outdated,
1609                    vk::Result::ERROR_SURFACE_LOST_KHR => crate::SurfaceError::Lost,
1610                    // We don't use VK_EXT_full_screen_exclusive
1611                    // VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT
1612                    _ => map_host_device_oom_and_lost_err(error).into(),
1613                }
1614            })?
1615        };
1616        if suboptimal {
1617            // We treat `VK_SUBOPTIMAL_KHR` as `VK_SUCCESS` on Android.
1618            // On Android 10+, libvulkan's `vkQueuePresentKHR` implementation returns `VK_SUBOPTIMAL_KHR` if not doing pre-rotation
1619            // (i.e `VkSwapchainCreateInfoKHR::preTransform` not being equal to the current device orientation).
1620            // This is always the case when the device orientation is anything other than the identity one, as we unconditionally use `VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR`.
1621            #[cfg(not(target_os = "android"))]
1622            log::warn!("Suboptimal present of frame {}", texture.index);
1623        }
1624        Ok(())
1625    }
1626
1627    unsafe fn get_timestamp_period(&self) -> f32 {
1628        self.device.timestamp_period
1629    }
1630}
1631
1632impl Queue {
1633    pub fn raw_device(&self) -> &ash::Device {
1634        &self.device.raw
1635    }
1636
1637    pub fn add_signal_semaphore(&self, semaphore: vk::Semaphore, semaphore_value: Option<u64>) {
1638        let mut guard = self.signal_semaphores.lock();
1639        if let Some(value) = semaphore_value {
1640            guard.push_timeline(semaphore, value);
1641        } else {
1642            guard.push_binary(semaphore);
1643        }
1644    }
1645}
1646
1647/// Maps
1648///
1649/// - VK_ERROR_OUT_OF_HOST_MEMORY
1650/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1651fn map_host_device_oom_err(err: vk::Result) -> crate::DeviceError {
1652    match err {
1653        vk::Result::ERROR_OUT_OF_HOST_MEMORY | vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => {
1654            get_oom_err(err)
1655        }
1656        e => get_unexpected_err(e),
1657    }
1658}
1659
1660/// Maps
1661///
1662/// - VK_ERROR_OUT_OF_HOST_MEMORY
1663/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1664/// - VK_ERROR_DEVICE_LOST
1665fn map_host_device_oom_and_lost_err(err: vk::Result) -> crate::DeviceError {
1666    match err {
1667        vk::Result::ERROR_DEVICE_LOST => get_lost_err(),
1668        other => map_host_device_oom_err(other),
1669    }
1670}
1671
1672/// Maps
1673///
1674/// - VK_ERROR_OUT_OF_HOST_MEMORY
1675/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1676/// - VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1677fn map_host_device_oom_and_ioca_err(err: vk::Result) -> crate::DeviceError {
1678    // We don't use VK_KHR_buffer_device_address
1679    // VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1680    map_host_device_oom_err(err)
1681}
1682
1683/// Maps
1684///
1685/// - VK_ERROR_OUT_OF_HOST_MEMORY
1686fn map_host_oom_err(err: vk::Result) -> crate::DeviceError {
1687    match err {
1688        vk::Result::ERROR_OUT_OF_HOST_MEMORY => get_oom_err(err),
1689        e => get_unexpected_err(e),
1690    }
1691}
1692
1693/// Maps
1694///
1695/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1696fn map_device_oom_err(err: vk::Result) -> crate::DeviceError {
1697    match err {
1698        vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => get_oom_err(err),
1699        e => get_unexpected_err(e),
1700    }
1701}
1702
1703/// Maps
1704///
1705/// - VK_ERROR_OUT_OF_HOST_MEMORY
1706/// - VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1707fn map_host_oom_and_ioca_err(err: vk::Result) -> crate::DeviceError {
1708    // We don't use VK_KHR_buffer_device_address
1709    // VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1710    map_host_oom_err(err)
1711}
1712
1713/// Maps
1714///
1715/// - VK_ERROR_OUT_OF_HOST_MEMORY
1716/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1717/// - VK_PIPELINE_COMPILE_REQUIRED_EXT
1718/// - VK_ERROR_INVALID_SHADER_NV
1719fn map_pipeline_err(err: vk::Result) -> crate::DeviceError {
1720    // We don't use VK_EXT_pipeline_creation_cache_control
1721    // VK_PIPELINE_COMPILE_REQUIRED_EXT
1722    // We don't use VK_NV_glsl_shader
1723    // VK_ERROR_INVALID_SHADER_NV
1724    map_host_device_oom_err(err)
1725}
1726
1727/// Returns [`crate::DeviceError::Unexpected`] or panics if the `internal_error_panic`
1728/// feature flag is enabled.
1729fn get_unexpected_err(_err: vk::Result) -> crate::DeviceError {
1730    #[cfg(feature = "internal_error_panic")]
1731    panic!("Unexpected Vulkan error: {_err:?}");
1732
1733    #[allow(unreachable_code)]
1734    crate::DeviceError::Unexpected
1735}
1736
1737/// Returns [`crate::DeviceError::OutOfMemory`].
1738fn get_oom_err(_err: vk::Result) -> crate::DeviceError {
1739    crate::DeviceError::OutOfMemory
1740}
1741
1742/// Returns [`crate::DeviceError::Lost`] or panics if the `device_lost_panic`
1743/// feature flag is enabled.
1744fn get_lost_err() -> crate::DeviceError {
1745    #[cfg(feature = "device_lost_panic")]
1746    panic!("Device lost");
1747
1748    #[allow(unreachable_code)]
1749    crate::DeviceError::Lost
1750}
1751
1752#[derive(Clone, Copy, Pod, Zeroable)]
1753#[repr(C)]
1754struct RawTlasInstance {
1755    transform: [f32; 12],
1756    custom_data_and_mask: u32,
1757    shader_binding_table_record_offset_and_flags: u32,
1758    acceleration_structure_reference: u64,
1759}
1760
1761/// Arguments to the [`CreateDeviceCallback`].
1762pub struct CreateDeviceCallbackArgs<'arg, 'pnext, 'this>
1763where
1764    'this: 'pnext,
1765{
1766    /// The extensions to enable for the device. You must not remove anything from this list,
1767    /// but you may add to it.
1768    pub extensions: &'arg mut Vec<&'static CStr>,
1769    /// The physical device features to enable. You may enable features, but must not disable any.
1770    pub device_features: &'arg mut PhysicalDeviceFeatures,
1771    /// The queue create infos for the device. You may add or modify queue create infos as needed.
1772    pub queue_create_infos: &'arg mut Vec<vk::DeviceQueueCreateInfo<'pnext>>,
1773    /// The create info for the device. You may add or modify things in the pnext chain, but
1774    /// do not turn features off. Additionally, do not add things to the list of extensions,
1775    /// or to the feature set, as all changes to that member will be overwritten.
1776    pub create_info: &'arg mut vk::DeviceCreateInfo<'pnext>,
1777    /// We need to have `'this` in the struct, so we can declare that all lifetimes coming from
1778    /// captures in the closure will live longer (and hence satisfy) `'pnext`. However, we
1779    /// don't actually directly use `'this`
1780    _phantom: PhantomData<&'this ()>,
1781}
1782
1783/// Callback to allow changing the vulkan device creation parameters.
1784///
1785/// # Safety:
1786/// - If you want to add extensions, add the to the `Vec<'static CStr>` not the create info,
1787///   as the create info value will be overwritten.
1788/// - Callback must not remove features.
1789/// - Callback must not change anything to what the instance does not support.
1790pub type CreateDeviceCallback<'this> =
1791    dyn for<'arg, 'pnext> FnOnce(CreateDeviceCallbackArgs<'arg, 'pnext, 'this>) + 'this;
1792
1793/// Arguments to the [`CreateInstanceCallback`].
1794pub struct CreateInstanceCallbackArgs<'arg, 'pnext, 'this>
1795where
1796    'this: 'pnext,
1797{
1798    /// The extensions to enable for the instance. You must not remove anything from this list,
1799    /// but you may add to it.
1800    pub extensions: &'arg mut Vec<&'static CStr>,
1801    /// The create info for the instance. You may add or modify things in the pnext chain, but
1802    /// do not turn features off. Additionally, do not add things to the list of extensions,
1803    /// all changes to that member will be overwritten.
1804    pub create_info: &'arg mut vk::InstanceCreateInfo<'pnext>,
1805    /// Vulkan entry point.
1806    pub entry: &'arg ash::Entry,
1807    /// We need to have `'this` in the struct, so we can declare that all lifetimes coming from
1808    /// captures in the closure will live longer (and hence satisfy) `'pnext`. However, we
1809    /// don't actually directly use `'this`
1810    _phantom: PhantomData<&'this ()>,
1811}
1812
1813/// Callback to allow changing the vulkan instance creation parameters.
1814///
1815/// # Safety:
1816/// - If you want to add extensions, add the to the `Vec<'static CStr>` not the create info,
1817///   as the create info value will be overwritten.
1818/// - Callback must not remove features.
1819/// - Callback must not change anything to what the instance does not support.
1820pub type CreateInstanceCallback<'this> =
1821    dyn for<'arg, 'pnext> FnOnce(CreateInstanceCallbackArgs<'arg, 'pnext, 'this>) + 'this;