wgpu_hal/vulkan/
mod.rs

1/*!
2# Vulkan API internals.
3
4## Stack memory
5
6Ash expects slices, which we don't generally have available.
7We cope with this requirement by the combination of the following ways:
8  - temporarily allocating `Vec` on heap, where overhead is permitted
9  - growing temporary local storage
10
11## Framebuffers and Render passes
12
13Render passes are cached on the device and kept forever.
14
15Framebuffers are also cached on the device, but they are removed when
16any of the image views (they have) gets removed.
17If Vulkan supports image-less framebuffers,
18then the actual views are excluded from the framebuffer key.
19
20## Fences
21
22If timeline semaphores are available, they are used 1:1 with wgpu-hal fences.
23Otherwise, we manage a pool of `VkFence` objects behind each `hal::Fence`.
24
25!*/
26
27mod adapter;
28mod command;
29mod conv;
30mod device;
31mod drm;
32mod instance;
33mod sampler;
34mod semaphore_list;
35
36pub use adapter::PhysicalDeviceFeatures;
37
38use alloc::{boxed::Box, ffi::CString, sync::Arc, vec::Vec};
39use core::{borrow::Borrow, ffi::CStr, fmt, marker::PhantomData, mem, num::NonZeroU32};
40
41use arrayvec::ArrayVec;
42use ash::{ext, khr, vk};
43use bytemuck::{Pod, Zeroable};
44use hashbrown::HashSet;
45use parking_lot::{Mutex, RwLock};
46
47use naga::FastHashMap;
48use wgt::InternalCounter;
49
50use semaphore_list::SemaphoreList;
51
52const MAX_TOTAL_ATTACHMENTS: usize = crate::MAX_COLOR_ATTACHMENTS * 2 + 1;
53
54#[derive(Clone, Debug)]
55pub struct Api;
56
57impl crate::Api for Api {
58    const VARIANT: wgt::Backend = wgt::Backend::Vulkan;
59
60    type Instance = Instance;
61    type Surface = Surface;
62    type Adapter = Adapter;
63    type Device = Device;
64
65    type Queue = Queue;
66    type CommandEncoder = CommandEncoder;
67    type CommandBuffer = CommandBuffer;
68
69    type Buffer = Buffer;
70    type Texture = Texture;
71    type SurfaceTexture = SurfaceTexture;
72    type TextureView = TextureView;
73    type Sampler = Sampler;
74    type QuerySet = QuerySet;
75    type Fence = Fence;
76    type AccelerationStructure = AccelerationStructure;
77    type PipelineCache = PipelineCache;
78
79    type BindGroupLayout = BindGroupLayout;
80    type BindGroup = BindGroup;
81    type PipelineLayout = PipelineLayout;
82    type ShaderModule = ShaderModule;
83    type RenderPipeline = RenderPipeline;
84    type ComputePipeline = ComputePipeline;
85}
86
87crate::impl_dyn_resource!(
88    Adapter,
89    AccelerationStructure,
90    BindGroup,
91    BindGroupLayout,
92    Buffer,
93    CommandBuffer,
94    CommandEncoder,
95    ComputePipeline,
96    Device,
97    Fence,
98    Instance,
99    PipelineCache,
100    PipelineLayout,
101    QuerySet,
102    Queue,
103    RenderPipeline,
104    Sampler,
105    ShaderModule,
106    Surface,
107    SurfaceTexture,
108    Texture,
109    TextureView
110);
111
112struct DebugUtils {
113    extension: ext::debug_utils::Instance,
114    messenger: vk::DebugUtilsMessengerEXT,
115
116    /// Owning pointer to the debug messenger callback user data.
117    ///
118    /// `InstanceShared::drop` destroys the debug messenger before
119    /// dropping this, so the callback should never receive a dangling
120    /// user data pointer.
121    #[allow(dead_code)]
122    callback_data: Box<DebugUtilsMessengerUserData>,
123}
124
125pub struct DebugUtilsCreateInfo {
126    severity: vk::DebugUtilsMessageSeverityFlagsEXT,
127    message_type: vk::DebugUtilsMessageTypeFlagsEXT,
128    callback_data: Box<DebugUtilsMessengerUserData>,
129}
130
131#[derive(Debug)]
132/// The properties related to the validation layer needed for the
133/// DebugUtilsMessenger for their workarounds
134struct ValidationLayerProperties {
135    /// Validation layer description, from `vk::LayerProperties`.
136    layer_description: CString,
137
138    /// Validation layer specification version, from `vk::LayerProperties`.
139    layer_spec_version: u32,
140}
141
142/// User data needed by `instance::debug_utils_messenger_callback`.
143///
144/// When we create the [`vk::DebugUtilsMessengerEXT`], the `pUserData`
145/// pointer refers to one of these values.
146#[derive(Debug)]
147pub struct DebugUtilsMessengerUserData {
148    /// The properties related to the validation layer, if present
149    validation_layer_properties: Option<ValidationLayerProperties>,
150
151    /// If the OBS layer is present. OBS never increments the version of their layer,
152    /// so there's no reason to have the version.
153    has_obs_layer: bool,
154}
155
156pub struct InstanceShared {
157    raw: ash::Instance,
158    extensions: Vec<&'static CStr>,
159    drop_guard: Option<crate::DropGuard>,
160    flags: wgt::InstanceFlags,
161    memory_budget_thresholds: wgt::MemoryBudgetThresholds,
162    debug_utils: Option<DebugUtils>,
163    get_physical_device_properties: Option<khr::get_physical_device_properties2::Instance>,
164    entry: ash::Entry,
165    has_nv_optimus: bool,
166    android_sdk_version: u32,
167    /// The instance API version.
168    ///
169    /// Which is the version of Vulkan supported for instance-level functionality.
170    ///
171    /// It is associated with a `VkInstance` and its children,
172    /// except for a `VkPhysicalDevice` and its children.
173    instance_api_version: u32,
174}
175
176pub struct Instance {
177    shared: Arc<InstanceShared>,
178}
179
180/// Semaphore used to acquire a swapchain image.
181#[derive(Debug)]
182struct SwapchainAcquireSemaphore {
183    /// A semaphore that is signaled when this image is safe for us to modify.
184    ///
185    /// When [`vkAcquireNextImageKHR`] returns the index of the next swapchain
186    /// image that we should use, that image may actually still be in use by the
187    /// presentation engine, and is not yet safe to modify. However, that
188    /// function does accept a semaphore that it will signal when the image is
189    /// indeed safe to begin messing with.
190    ///
191    /// This semaphore is:
192    ///
193    /// - waited for by the first queue submission to operate on this image
194    ///   since it was acquired, and
195    ///
196    /// - signaled by [`vkAcquireNextImageKHR`] when the acquired image is ready
197    ///   for us to use.
198    ///
199    /// [`vkAcquireNextImageKHR`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkAcquireNextImageKHR
200    acquire: vk::Semaphore,
201
202    /// True if the next command submission operating on this image should wait
203    /// for [`acquire`].
204    ///
205    /// We must wait for `acquire` before drawing to this swapchain image, but
206    /// because `wgpu-hal` queue submissions are always strongly ordered, only
207    /// the first submission that works with a swapchain image actually needs to
208    /// wait. We set this flag when this image is acquired, and clear it the
209    /// first time it's passed to [`Queue::submit`] as a surface texture.
210    ///
211    /// Additionally, semaphores can only be waited on once, so we need to ensure
212    /// that we only actually pass this semaphore to the first submission that
213    /// uses that image.
214    ///
215    /// [`acquire`]: SwapchainAcquireSemaphore::acquire
216    /// [`Queue::submit`]: crate::Queue::submit
217    should_wait_for_acquire: bool,
218
219    /// The fence value of the last command submission that wrote to this image.
220    ///
221    /// The next time we try to acquire this image, we'll block until
222    /// this submission finishes, proving that [`acquire`] is ready to
223    /// pass to `vkAcquireNextImageKHR` again.
224    ///
225    /// [`acquire`]: SwapchainAcquireSemaphore::acquire
226    previously_used_submission_index: crate::FenceValue,
227}
228
229impl SwapchainAcquireSemaphore {
230    fn new(device: &DeviceShared, index: usize) -> Result<Self, crate::DeviceError> {
231        Ok(Self {
232            acquire: device
233                .new_binary_semaphore(&format!("SwapchainImageSemaphore: Index {index} acquire"))?,
234            should_wait_for_acquire: true,
235            previously_used_submission_index: 0,
236        })
237    }
238
239    /// Sets the fence value which the next acquire will wait for. This prevents
240    /// the semaphore from being used while the previous submission is still in flight.
241    fn set_used_fence_value(&mut self, value: crate::FenceValue) {
242        self.previously_used_submission_index = value;
243    }
244
245    /// Return the semaphore that commands drawing to this image should wait for, if any.
246    ///
247    /// This only returns `Some` once per acquisition; see
248    /// [`SwapchainAcquireSemaphore::should_wait_for_acquire`] for details.
249    fn get_acquire_wait_semaphore(&mut self) -> Option<vk::Semaphore> {
250        if self.should_wait_for_acquire {
251            self.should_wait_for_acquire = false;
252            Some(self.acquire)
253        } else {
254            None
255        }
256    }
257
258    /// Indicates the cpu-side usage of this semaphore has finished for the frame,
259    /// so reset internal state to be ready for the next frame.
260    fn end_semaphore_usage(&mut self) {
261        // Reset the acquire semaphore, so that the next time we acquire this
262        // image, we can wait for it again.
263        self.should_wait_for_acquire = true;
264    }
265
266    unsafe fn destroy(&self, device: &ash::Device) {
267        unsafe {
268            device.destroy_semaphore(self.acquire, None);
269        }
270    }
271}
272
273#[derive(Debug)]
274struct SwapchainPresentSemaphores {
275    /// A pool of semaphores for ordering presentation after drawing.
276    ///
277    /// The first [`present_index`] semaphores in this vector are:
278    ///
279    /// - all waited on by the call to [`vkQueuePresentKHR`] that presents this
280    ///   image, and
281    ///
282    /// - each signaled by some [`vkQueueSubmit`] queue submission that draws to
283    ///   this image, when the submission finishes execution.
284    ///
285    /// This vector accumulates one semaphore per submission that writes to this
286    /// image. This is awkward, but hard to avoid: [`vkQueuePresentKHR`]
287    /// requires a semaphore to order it with respect to drawing commands, and
288    /// we can't attach new completion semaphores to a command submission after
289    /// it's been submitted. This means that, at submission time, we must create
290    /// the semaphore we might need if the caller's next action is to enqueue a
291    /// presentation of this image.
292    ///
293    /// An alternative strategy would be for presentation to enqueue an empty
294    /// submit, ordered relative to other submits in the usual way, and
295    /// signaling a single presentation semaphore. But we suspect that submits
296    /// are usually expensive enough, and semaphores usually cheap enough, that
297    /// performance-sensitive users will avoid making many submits, so that the
298    /// cost of accumulated semaphores will usually be less than the cost of an
299    /// additional submit.
300    ///
301    /// Only the first [`present_index`] semaphores in the vector are actually
302    /// going to be signalled by submitted commands, and need to be waited for
303    /// by the next present call. Any semaphores beyond that index were created
304    /// for prior presents and are simply being retained for recycling.
305    ///
306    /// [`present_index`]: SwapchainPresentSemaphores::present_index
307    /// [`vkQueuePresentKHR`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueuePresentKHR
308    /// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
309    present: Vec<vk::Semaphore>,
310
311    /// The number of semaphores in [`present`] to be signalled for this submission.
312    ///
313    /// [`present`]: SwapchainPresentSemaphores::present
314    present_index: usize,
315
316    /// Which image this semaphore set is used for.
317    frame_index: usize,
318}
319
320impl SwapchainPresentSemaphores {
321    pub fn new(frame_index: usize) -> Self {
322        Self {
323            present: Vec::new(),
324            present_index: 0,
325            frame_index,
326        }
327    }
328
329    /// Return the semaphore that the next submission that writes to this image should
330    /// signal when it's done.
331    ///
332    /// See [`SwapchainPresentSemaphores::present`] for details.
333    fn get_submit_signal_semaphore(
334        &mut self,
335        device: &DeviceShared,
336    ) -> Result<vk::Semaphore, crate::DeviceError> {
337        // Try to recycle a semaphore we created for a previous presentation.
338        let sem = match self.present.get(self.present_index) {
339            Some(sem) => *sem,
340            None => {
341                let sem = device.new_binary_semaphore(&format!(
342                    "SwapchainImageSemaphore: Image {} present semaphore {}",
343                    self.frame_index, self.present_index
344                ))?;
345                self.present.push(sem);
346                sem
347            }
348        };
349
350        self.present_index += 1;
351
352        Ok(sem)
353    }
354
355    /// Indicates the cpu-side usage of this semaphore has finished for the frame,
356    /// so reset internal state to be ready for the next frame.
357    fn end_semaphore_usage(&mut self) {
358        // Reset the index to 0, so that the next time we get a semaphore, we
359        // start from the beginning of the list.
360        self.present_index = 0;
361    }
362
363    /// Return the semaphores that a presentation of this image should wait on.
364    ///
365    /// Return a slice of semaphores that the call to [`vkQueueSubmit`] that
366    /// ends this image's acquisition should wait for. See
367    /// [`SwapchainPresentSemaphores::present`] for details.
368    ///
369    /// Reset `self` to be ready for the next acquisition cycle.
370    ///
371    /// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
372    fn get_present_wait_semaphores(&mut self) -> Vec<vk::Semaphore> {
373        self.present[0..self.present_index].to_vec()
374    }
375
376    unsafe fn destroy(&self, device: &ash::Device) {
377        unsafe {
378            for sem in &self.present {
379                device.destroy_semaphore(*sem, None);
380            }
381        }
382    }
383}
384
385struct Swapchain {
386    raw: vk::SwapchainKHR,
387    functor: khr::swapchain::Device,
388    device: Arc<DeviceShared>,
389    images: Vec<vk::Image>,
390    config: crate::SurfaceConfiguration,
391
392    /// Semaphores used between image acquisition and the first submission
393    /// that uses that image. This is indexed using [`next_acquire_index`].
394    ///
395    /// Because we need to provide this to [`vkAcquireNextImageKHR`], we haven't
396    /// received the swapchain image index for the frame yet, so we cannot use
397    /// that to index it.
398    ///
399    /// Before we pass this to [`vkAcquireNextImageKHR`], we ensure that we wait on
400    /// the submission indicated by [`previously_used_submission_index`]. This enusres
401    /// the semaphore is no longer in use before we use it.
402    ///
403    /// [`next_acquire_index`]: Swapchain::next_acquire_index
404    /// [`vkAcquireNextImageKHR`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkAcquireNextImageKHR
405    /// [`previously_used_submission_index`]: SwapchainAcquireSemaphore::previously_used_submission_index
406    acquire_semaphores: Vec<Arc<Mutex<SwapchainAcquireSemaphore>>>,
407    /// The index of the next acquire semaphore to use.
408    ///
409    /// This is incremented each time we acquire a new image, and wraps around
410    /// to 0 when it reaches the end of [`acquire_semaphores`].
411    ///
412    /// [`acquire_semaphores`]: Swapchain::acquire_semaphores
413    next_acquire_index: usize,
414
415    /// Semaphore sets used between all submissions that write to an image and
416    /// the presentation of that image.
417    ///
418    /// This is indexed by the swapchain image index returned by
419    /// [`vkAcquireNextImageKHR`].
420    ///
421    /// We know it is safe to use these semaphores because use them
422    /// _after_ the acquire semaphore. Because the acquire semaphore
423    /// has been signaled, the previous presentation using that image
424    /// is known-finished, so this semaphore is no longer in use.
425    ///
426    /// [`vkAcquireNextImageKHR`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkAcquireNextImageKHR
427    present_semaphores: Vec<Arc<Mutex<SwapchainPresentSemaphores>>>,
428
429    /// The present timing information which will be set in the next call to [`present()`](crate::Queue::present()).
430    ///
431    /// # Safety
432    ///
433    /// This must only be set if [`wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING`] is enabled, and
434    /// so the VK_GOOGLE_display_timing extension is present.
435    next_present_time: Option<vk::PresentTimeGOOGLE>,
436}
437
438impl Swapchain {
439    /// Mark the current frame finished, advancing to the next acquire semaphore.
440    fn advance_acquire_semaphore(&mut self) {
441        let semaphore_count = self.acquire_semaphores.len();
442        self.next_acquire_index = (self.next_acquire_index + 1) % semaphore_count;
443    }
444
445    /// Get the next acquire semaphore that should be used with this swapchain.
446    fn get_acquire_semaphore(&self) -> Arc<Mutex<SwapchainAcquireSemaphore>> {
447        self.acquire_semaphores[self.next_acquire_index].clone()
448    }
449
450    /// Get the set of present semaphores that should be used with the given image index.
451    fn get_present_semaphores(&self, index: u32) -> Arc<Mutex<SwapchainPresentSemaphores>> {
452        self.present_semaphores[index as usize].clone()
453    }
454}
455
456pub struct Surface {
457    raw: vk::SurfaceKHR,
458    functor: khr::surface::Instance,
459    instance: Arc<InstanceShared>,
460    swapchain: RwLock<Option<Swapchain>>,
461}
462
463impl Surface {
464    pub unsafe fn raw_handle(&self) -> vk::SurfaceKHR {
465        self.raw
466    }
467
468    /// Get the raw Vulkan swapchain associated with this surface.
469    ///
470    /// Returns [`None`] if the surface is not configured.
471    pub fn raw_swapchain(&self) -> Option<vk::SwapchainKHR> {
472        let read = self.swapchain.read();
473        read.as_ref().map(|it| it.raw)
474    }
475
476    /// Set the present timing information which will be used for the next [presentation](crate::Queue::present()) of this surface,
477    /// using [VK_GOOGLE_display_timing].
478    ///
479    /// This can be used to give an id to presentations, for future use of [`vk::PastPresentationTimingGOOGLE`].
480    /// Note that `wgpu-hal` does *not* provide a way to use that API - you should manually access this through [`ash`].
481    ///
482    /// This can also be used to add a "not before" timestamp to the presentation.
483    ///
484    /// The exact semantics of the fields are also documented in the [specification](https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPresentTimeGOOGLE.html) for the extension.
485    ///
486    /// # Panics
487    ///
488    /// - If the surface hasn't been configured.
489    /// - If the device doesn't [support present timing](wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING).
490    ///
491    /// [VK_GOOGLE_display_timing]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_GOOGLE_display_timing.html
492    #[track_caller]
493    pub fn set_next_present_time(&self, present_timing: vk::PresentTimeGOOGLE) {
494        let mut swapchain = self.swapchain.write();
495        let swapchain = swapchain
496            .as_mut()
497            .expect("Surface should have been configured");
498        let features = wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING;
499        if swapchain.device.features.contains(features) {
500            swapchain.next_present_time = Some(present_timing);
501        } else {
502            // Ideally we'd use something like `device.required_features` here, but that's in `wgpu-core`, which we are a dependency of
503            panic!(
504                concat!(
505                    "Tried to set display timing properties ",
506                    "without the corresponding feature ({:?}) enabled."
507                ),
508                features
509            );
510        }
511    }
512}
513
514#[derive(Debug)]
515pub struct SurfaceTexture {
516    index: u32,
517    texture: Texture,
518    acquire_semaphores: Arc<Mutex<SwapchainAcquireSemaphore>>,
519    present_semaphores: Arc<Mutex<SwapchainPresentSemaphores>>,
520}
521
522impl crate::DynSurfaceTexture for SurfaceTexture {}
523
524impl Borrow<Texture> for SurfaceTexture {
525    fn borrow(&self) -> &Texture {
526        &self.texture
527    }
528}
529
530impl Borrow<dyn crate::DynTexture> for SurfaceTexture {
531    fn borrow(&self) -> &dyn crate::DynTexture {
532        &self.texture
533    }
534}
535
536pub struct Adapter {
537    raw: vk::PhysicalDevice,
538    instance: Arc<InstanceShared>,
539    //queue_families: Vec<vk::QueueFamilyProperties>,
540    known_memory_flags: vk::MemoryPropertyFlags,
541    phd_capabilities: adapter::PhysicalDeviceProperties,
542    phd_features: PhysicalDeviceFeatures,
543    downlevel_flags: wgt::DownlevelFlags,
544    private_caps: PrivateCapabilities,
545    workarounds: Workarounds,
546}
547
548// TODO there's no reason why this can't be unified--the function pointers should all be the same--it's not clear how to do this with `ash`.
549enum ExtensionFn<T> {
550    /// The loaded function pointer struct for an extension.
551    Extension(T),
552    /// The extension was promoted to a core version of Vulkan and the functions on `ash`'s `DeviceV1_x` traits should be used.
553    Promoted,
554}
555
556struct DeviceExtensionFunctions {
557    debug_utils: Option<ext::debug_utils::Device>,
558    draw_indirect_count: Option<khr::draw_indirect_count::Device>,
559    timeline_semaphore: Option<ExtensionFn<khr::timeline_semaphore::Device>>,
560    ray_tracing: Option<RayTracingDeviceExtensionFunctions>,
561    mesh_shading: Option<ext::mesh_shader::Device>,
562}
563
564struct RayTracingDeviceExtensionFunctions {
565    acceleration_structure: khr::acceleration_structure::Device,
566    buffer_device_address: khr::buffer_device_address::Device,
567}
568
569/// Set of internal capabilities, which don't show up in the exposed
570/// device geometry, but affect the code paths taken internally.
571#[derive(Clone, Debug)]
572struct PrivateCapabilities {
573    image_view_usage: bool,
574    timeline_semaphores: bool,
575    texture_d24: bool,
576    texture_d24_s8: bool,
577    texture_s8: bool,
578    /// Ability to present contents to any screen. Only needed to work around broken platform configurations.
579    can_present: bool,
580    non_coherent_map_mask: wgt::BufferAddress,
581    multi_draw_indirect: bool,
582
583    /// True if this adapter advertises the [`robustBufferAccess`][vrba] feature.
584    ///
585    /// Note that Vulkan's `robustBufferAccess` is not sufficient to implement
586    /// `wgpu_hal`'s guarantee that shaders will not access buffer contents via
587    /// a given bindgroup binding outside that binding's [accessible
588    /// region][ar]. Enabling `robustBufferAccess` does ensure that
589    /// out-of-bounds reads and writes are not undefined behavior (that's good),
590    /// but still permits out-of-bounds reads to return data from anywhere
591    /// within the buffer, not just the accessible region.
592    ///
593    /// [ar]: ../struct.BufferBinding.html#accessible-region
594    /// [vrba]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#features-robustBufferAccess
595    robust_buffer_access: bool,
596
597    robust_image_access: bool,
598
599    /// True if this adapter supports the [`VK_EXT_robustness2`] extension's
600    /// [`robustBufferAccess2`] feature.
601    ///
602    /// This is sufficient to implement `wgpu_hal`'s [required bounds-checking][ar] of
603    /// shader accesses to buffer contents. If this feature is not available,
604    /// this backend must have Naga inject bounds checks in the generated
605    /// SPIR-V.
606    ///
607    /// [`VK_EXT_robustness2`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_EXT_robustness2.html
608    /// [`robustBufferAccess2`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPhysicalDeviceRobustness2FeaturesEXT.html#features-robustBufferAccess2
609    /// [ar]: ../struct.BufferBinding.html#accessible-region
610    robust_buffer_access2: bool,
611
612    robust_image_access2: bool,
613    zero_initialize_workgroup_memory: bool,
614    image_format_list: bool,
615    maximum_samplers: u32,
616
617    /// True if this adapter supports the [`VK_KHR_shader_integer_dot_product`] extension
618    /// (promoted to Vulkan 1.3).
619    ///
620    /// This is used to generate optimized code for WGSL's `dot4{I, U}8Packed`.
621    ///
622    /// [`VK_KHR_shader_integer_dot_product`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_KHR_shader_integer_dot_product.html
623    shader_integer_dot_product: bool,
624
625    /// True if this adapter supports 8-bit integers provided by the
626    /// [`VK_KHR_shader_float16_int8`] extension (promoted to Vulkan 1.2).
627    ///
628    /// Allows shaders to declare the "Int8" capability. Note, however, that this
629    /// feature alone allows the use of 8-bit integers "only in the `Private`,
630    /// `Workgroup` (for non-Block variables), and `Function` storage classes"
631    /// ([see spec]). To use 8-bit integers in the interface storage classes (e.g.,
632    /// `StorageBuffer`), you also need to enable the corresponding feature in
633    /// `VkPhysicalDevice8BitStorageFeatures` and declare the corresponding SPIR-V
634    /// capability (e.g., `StorageBuffer8BitAccess`).
635    ///
636    /// [`VK_KHR_shader_float16_int8`]: https://registry.khronos.org/vulkan/specs/latest/man/html/VK_KHR_shader_float16_int8.html
637    /// [see spec]: https://registry.khronos.org/vulkan/specs/latest/man/html/VkPhysicalDeviceShaderFloat16Int8Features.html#extension-features-shaderInt8
638    shader_int8: bool,
639}
640
641bitflags::bitflags!(
642    /// Workaround flags.
643    #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
644    pub struct Workarounds: u32 {
645        /// Only generate SPIR-V for one entry point at a time.
646        const SEPARATE_ENTRY_POINTS = 0x1;
647        /// Qualcomm OOMs when there are zero color attachments but a non-null pointer
648        /// to a subpass resolve attachment array. This nulls out that pointer in that case.
649        const EMPTY_RESOLVE_ATTACHMENT_LISTS = 0x2;
650        /// If the following code returns false, then nvidia will end up filling the wrong range.
651        ///
652        /// ```skip
653        /// fn nvidia_succeeds() -> bool {
654        ///   # let (copy_length, start_offset) = (0, 0);
655        ///     if copy_length >= 4096 {
656        ///         if start_offset % 16 != 0 {
657        ///             if copy_length == 4096 {
658        ///                 return true;
659        ///             }
660        ///             if copy_length % 16 == 0 {
661        ///                 return false;
662        ///             }
663        ///         }
664        ///     }
665        ///     true
666        /// }
667        /// ```
668        ///
669        /// As such, we need to make sure all calls to vkCmdFillBuffer are aligned to 16 bytes
670        /// if they cover a range of 4096 bytes or more.
671        const FORCE_FILL_BUFFER_WITH_SIZE_GREATER_4096_ALIGNED_OFFSET_16 = 0x4;
672    }
673);
674
675#[derive(Clone, Debug, Eq, Hash, PartialEq)]
676struct AttachmentKey {
677    format: vk::Format,
678    layout: vk::ImageLayout,
679    ops: crate::AttachmentOps,
680}
681
682impl AttachmentKey {
683    /// Returns an attachment key for a compatible attachment.
684    fn compatible(format: vk::Format, layout: vk::ImageLayout) -> Self {
685        Self {
686            format,
687            layout,
688            ops: crate::AttachmentOps::all(),
689        }
690    }
691}
692
693#[derive(Clone, Eq, Hash, PartialEq)]
694struct ColorAttachmentKey {
695    base: AttachmentKey,
696    resolve: Option<AttachmentKey>,
697}
698
699#[derive(Clone, Eq, Hash, PartialEq)]
700struct DepthStencilAttachmentKey {
701    base: AttachmentKey,
702    stencil_ops: crate::AttachmentOps,
703}
704
705#[derive(Clone, Eq, Default, Hash, PartialEq)]
706struct RenderPassKey {
707    colors: ArrayVec<Option<ColorAttachmentKey>, { crate::MAX_COLOR_ATTACHMENTS }>,
708    depth_stencil: Option<DepthStencilAttachmentKey>,
709    sample_count: u32,
710    multiview: Option<NonZeroU32>,
711}
712
713struct DeviceShared {
714    raw: ash::Device,
715    family_index: u32,
716    queue_index: u32,
717    raw_queue: vk::Queue,
718    drop_guard: Option<crate::DropGuard>,
719    instance: Arc<InstanceShared>,
720    physical_device: vk::PhysicalDevice,
721    enabled_extensions: Vec<&'static CStr>,
722    extension_fns: DeviceExtensionFunctions,
723    vendor_id: u32,
724    pipeline_cache_validation_key: [u8; 16],
725    timestamp_period: f32,
726    private_caps: PrivateCapabilities,
727    workarounds: Workarounds,
728    features: wgt::Features,
729    render_passes: Mutex<FastHashMap<RenderPassKey, vk::RenderPass>>,
730    sampler_cache: Mutex<sampler::SamplerCache>,
731    memory_allocations_counter: InternalCounter,
732
733    /// Because we have cached framebuffers which are not deleted from until
734    /// the device is destroyed, if the implementation of vulkan re-uses handles
735    /// we need some way to differentiate between the old handle and the new handle.
736    /// This factory allows us to have a dedicated identity value for each texture.
737    texture_identity_factory: ResourceIdentityFactory<vk::Image>,
738    /// As above, for texture views.
739    texture_view_identity_factory: ResourceIdentityFactory<vk::ImageView>,
740}
741
742impl Drop for DeviceShared {
743    fn drop(&mut self) {
744        for &raw in self.render_passes.lock().values() {
745            unsafe { self.raw.destroy_render_pass(raw, None) };
746        }
747        if self.drop_guard.is_none() {
748            unsafe { self.raw.destroy_device(None) };
749        }
750    }
751}
752
753pub struct Device {
754    shared: Arc<DeviceShared>,
755    mem_allocator: Mutex<gpu_alloc::GpuAllocator<vk::DeviceMemory>>,
756    desc_allocator:
757        Mutex<gpu_descriptor::DescriptorAllocator<vk::DescriptorPool, vk::DescriptorSet>>,
758    valid_ash_memory_types: u32,
759    naga_options: naga::back::spv::Options<'static>,
760    #[cfg(feature = "renderdoc")]
761    render_doc: crate::auxil::renderdoc::RenderDoc,
762    counters: Arc<wgt::HalCounters>,
763}
764
765impl Drop for Device {
766    fn drop(&mut self) {
767        unsafe { self.mem_allocator.lock().cleanup(&*self.shared) };
768        unsafe { self.desc_allocator.lock().cleanup(&*self.shared) };
769    }
770}
771
772/// Semaphores for forcing queue submissions to run in order.
773///
774/// The [`wgpu_hal::Queue`] trait promises that if two calls to [`submit`] are
775/// ordered, then the first submission will finish on the GPU before the second
776/// submission begins. To get this behavior on Vulkan we need to pass semaphores
777/// to [`vkQueueSubmit`] for the commands to wait on before beginning execution,
778/// and to signal when their execution is done.
779///
780/// Normally this can be done with a single semaphore, waited on and then
781/// signalled for each submission. At any given time there's exactly one
782/// submission that would signal the semaphore, and exactly one waiting on it,
783/// as Vulkan requires.
784///
785/// However, as of Oct 2021, bug [#5508] in the Mesa ANV drivers caused them to
786/// hang if we use a single semaphore. The workaround is to alternate between
787/// two semaphores. The bug has been fixed in Mesa, but we should probably keep
788/// the workaround until, say, Oct 2026.
789///
790/// [`wgpu_hal::Queue`]: crate::Queue
791/// [`submit`]: crate::Queue::submit
792/// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
793/// [#5508]: https://gitlab.freedesktop.org/mesa/mesa/-/issues/5508
794#[derive(Clone)]
795struct RelaySemaphores {
796    /// The semaphore the next submission should wait on before beginning
797    /// execution on the GPU. This is `None` for the first submission, which
798    /// should not wait on anything at all.
799    wait: Option<vk::Semaphore>,
800
801    /// The semaphore the next submission should signal when it has finished
802    /// execution on the GPU.
803    signal: vk::Semaphore,
804}
805
806impl RelaySemaphores {
807    fn new(device: &DeviceShared) -> Result<Self, crate::DeviceError> {
808        Ok(Self {
809            wait: None,
810            signal: device.new_binary_semaphore("RelaySemaphores: 1")?,
811        })
812    }
813
814    /// Advances the semaphores, returning the semaphores that should be used for a submission.
815    fn advance(&mut self, device: &DeviceShared) -> Result<Self, crate::DeviceError> {
816        let old = self.clone();
817
818        // Build the state for the next submission.
819        match self.wait {
820            None => {
821                // The `old` values describe the first submission to this queue.
822                // The second submission should wait on `old.signal`, and then
823                // signal a new semaphore which we'll create now.
824                self.wait = Some(old.signal);
825                self.signal = device.new_binary_semaphore("RelaySemaphores: 2")?;
826            }
827            Some(ref mut wait) => {
828                // What this submission signals, the next should wait.
829                mem::swap(wait, &mut self.signal);
830            }
831        };
832
833        Ok(old)
834    }
835
836    /// Destroys the semaphores.
837    unsafe fn destroy(&self, device: &ash::Device) {
838        unsafe {
839            if let Some(wait) = self.wait {
840                device.destroy_semaphore(wait, None);
841            }
842            device.destroy_semaphore(self.signal, None);
843        }
844    }
845}
846
847pub struct Queue {
848    raw: vk::Queue,
849    swapchain_fn: khr::swapchain::Device,
850    device: Arc<DeviceShared>,
851    family_index: u32,
852    relay_semaphores: Mutex<RelaySemaphores>,
853    signal_semaphores: Mutex<SemaphoreList>,
854}
855
856impl Queue {
857    pub fn as_raw(&self) -> vk::Queue {
858        self.raw
859    }
860}
861
862impl Drop for Queue {
863    fn drop(&mut self) {
864        unsafe { self.relay_semaphores.lock().destroy(&self.device.raw) };
865    }
866}
867#[derive(Debug)]
868enum BufferMemoryBacking {
869    Managed(gpu_alloc::MemoryBlock<vk::DeviceMemory>),
870    VulkanMemory {
871        memory: vk::DeviceMemory,
872        offset: u64,
873        size: u64,
874    },
875}
876impl BufferMemoryBacking {
877    fn memory(&self) -> &vk::DeviceMemory {
878        match self {
879            Self::Managed(m) => m.memory(),
880            Self::VulkanMemory { memory, .. } => memory,
881        }
882    }
883    fn offset(&self) -> u64 {
884        match self {
885            Self::Managed(m) => m.offset(),
886            Self::VulkanMemory { offset, .. } => *offset,
887        }
888    }
889    fn size(&self) -> u64 {
890        match self {
891            Self::Managed(m) => m.size(),
892            Self::VulkanMemory { size, .. } => *size,
893        }
894    }
895}
896#[derive(Debug)]
897pub struct Buffer {
898    raw: vk::Buffer,
899    block: Option<Mutex<BufferMemoryBacking>>,
900}
901impl Buffer {
902    /// # Safety
903    ///
904    /// - `vk_buffer`'s memory must be managed by the caller
905    /// - Externally imported buffers can't be mapped by `wgpu`
906    pub unsafe fn from_raw(vk_buffer: vk::Buffer) -> Self {
907        Self {
908            raw: vk_buffer,
909            block: None,
910        }
911    }
912    /// # Safety
913    /// - We will use this buffer and the buffer's backing memory range as if we have exclusive ownership over it, until the wgpu resource is dropped and the wgpu-hal object is cleaned up
914    /// - Externally imported buffers can't be mapped by `wgpu`
915    /// - `offset` and `size` must be valid with the allocation of `memory`
916    pub unsafe fn from_raw_managed(
917        vk_buffer: vk::Buffer,
918        memory: vk::DeviceMemory,
919        offset: u64,
920        size: u64,
921    ) -> Self {
922        Self {
923            raw: vk_buffer,
924            block: Some(Mutex::new(BufferMemoryBacking::VulkanMemory {
925                memory,
926                offset,
927                size,
928            })),
929        }
930    }
931}
932
933impl crate::DynBuffer for Buffer {}
934
935#[derive(Debug)]
936pub struct AccelerationStructure {
937    raw: vk::AccelerationStructureKHR,
938    buffer: vk::Buffer,
939    block: Mutex<gpu_alloc::MemoryBlock<vk::DeviceMemory>>,
940    compacted_size_query: Option<vk::QueryPool>,
941}
942
943impl crate::DynAccelerationStructure for AccelerationStructure {}
944
945#[derive(Debug)]
946pub struct Texture {
947    raw: vk::Image,
948    drop_guard: Option<crate::DropGuard>,
949    external_memory: Option<vk::DeviceMemory>,
950    block: Option<gpu_alloc::MemoryBlock<vk::DeviceMemory>>,
951    format: wgt::TextureFormat,
952    copy_size: crate::CopyExtent,
953    identity: ResourceIdentity<vk::Image>,
954}
955
956impl crate::DynTexture for Texture {}
957
958impl Texture {
959    /// # Safety
960    ///
961    /// - The image handle must not be manually destroyed
962    pub unsafe fn raw_handle(&self) -> vk::Image {
963        self.raw
964    }
965
966    /// # Safety
967    ///
968    /// - The external memory must not be manually freed
969    pub unsafe fn external_memory(&self) -> Option<vk::DeviceMemory> {
970        self.external_memory
971    }
972}
973
974#[derive(Debug)]
975pub struct TextureView {
976    raw_texture: vk::Image,
977    raw: vk::ImageView,
978    layers: NonZeroU32,
979    format: wgt::TextureFormat,
980    raw_format: vk::Format,
981    base_mip_level: u32,
982    dimension: wgt::TextureViewDimension,
983    texture_identity: ResourceIdentity<vk::Image>,
984    view_identity: ResourceIdentity<vk::ImageView>,
985}
986
987impl crate::DynTextureView for TextureView {}
988
989impl TextureView {
990    /// # Safety
991    ///
992    /// - The image view handle must not be manually destroyed
993    pub unsafe fn raw_handle(&self) -> vk::ImageView {
994        self.raw
995    }
996
997    /// Returns the raw texture view, along with its identity.
998    fn identified_raw_view(&self) -> IdentifiedTextureView {
999        IdentifiedTextureView {
1000            raw: self.raw,
1001            identity: self.view_identity,
1002        }
1003    }
1004}
1005
1006#[derive(Debug)]
1007pub struct Sampler {
1008    raw: vk::Sampler,
1009    create_info: vk::SamplerCreateInfo<'static>,
1010}
1011
1012impl crate::DynSampler for Sampler {}
1013
1014/// Information about a binding within a specific BindGroupLayout / BindGroup.
1015/// This will be used to construct a [`naga::back::spv::BindingInfo`], where
1016/// the descriptor set value will be taken from the index of the group.
1017#[derive(Copy, Clone, Debug)]
1018struct BindingInfo {
1019    binding: u32,
1020    binding_array_size: Option<NonZeroU32>,
1021}
1022
1023#[derive(Debug)]
1024pub struct BindGroupLayout {
1025    raw: vk::DescriptorSetLayout,
1026    desc_count: gpu_descriptor::DescriptorTotalCount,
1027    /// Sorted list of entries.
1028    entries: Box<[wgt::BindGroupLayoutEntry]>,
1029    /// Map of original binding index to remapped binding index and optional
1030    /// array size.
1031    binding_map: Vec<(u32, BindingInfo)>,
1032    contains_binding_arrays: bool,
1033}
1034
1035impl crate::DynBindGroupLayout for BindGroupLayout {}
1036
1037#[derive(Debug)]
1038pub struct PipelineLayout {
1039    raw: vk::PipelineLayout,
1040    binding_map: naga::back::spv::BindingMap,
1041}
1042
1043impl crate::DynPipelineLayout for PipelineLayout {}
1044
1045#[derive(Debug)]
1046pub struct BindGroup {
1047    set: gpu_descriptor::DescriptorSet<vk::DescriptorSet>,
1048}
1049
1050impl crate::DynBindGroup for BindGroup {}
1051
1052/// Miscellaneous allocation recycling pool for `CommandAllocator`.
1053#[derive(Default)]
1054struct Temp {
1055    marker: Vec<u8>,
1056    buffer_barriers: Vec<vk::BufferMemoryBarrier<'static>>,
1057    image_barriers: Vec<vk::ImageMemoryBarrier<'static>>,
1058}
1059
1060impl Temp {
1061    fn clear(&mut self) {
1062        self.marker.clear();
1063        self.buffer_barriers.clear();
1064        self.image_barriers.clear();
1065    }
1066
1067    fn make_c_str(&mut self, name: &str) -> &CStr {
1068        self.marker.clear();
1069        self.marker.extend_from_slice(name.as_bytes());
1070        self.marker.push(0);
1071        unsafe { CStr::from_bytes_with_nul_unchecked(&self.marker) }
1072    }
1073}
1074
1075/// Generates unique IDs for each resource of type `T`.
1076///
1077/// Because vk handles are not permanently unique, this
1078/// provides a way to generate unique IDs for each resource.
1079struct ResourceIdentityFactory<T> {
1080    #[cfg(not(target_has_atomic = "64"))]
1081    next_id: Mutex<u64>,
1082    #[cfg(target_has_atomic = "64")]
1083    next_id: core::sync::atomic::AtomicU64,
1084    _phantom: PhantomData<T>,
1085}
1086
1087impl<T> ResourceIdentityFactory<T> {
1088    fn new() -> Self {
1089        Self {
1090            #[cfg(not(target_has_atomic = "64"))]
1091            next_id: Mutex::new(0),
1092            #[cfg(target_has_atomic = "64")]
1093            next_id: core::sync::atomic::AtomicU64::new(0),
1094            _phantom: PhantomData,
1095        }
1096    }
1097
1098    /// Returns a new unique ID for a resource of type `T`.
1099    fn next(&self) -> ResourceIdentity<T> {
1100        #[cfg(not(target_has_atomic = "64"))]
1101        {
1102            let mut next_id = self.next_id.lock();
1103            let id = *next_id;
1104            *next_id += 1;
1105            ResourceIdentity {
1106                id,
1107                _phantom: PhantomData,
1108            }
1109        }
1110
1111        #[cfg(target_has_atomic = "64")]
1112        ResourceIdentity {
1113            id: self
1114                .next_id
1115                .fetch_add(1, core::sync::atomic::Ordering::Relaxed),
1116            _phantom: PhantomData,
1117        }
1118    }
1119}
1120
1121/// A unique identifier for a resource of type `T`.
1122///
1123/// This is used as a hashable key for resources, which
1124/// is permanently unique through the lifetime of the program.
1125#[derive(Debug, Copy, Clone, Eq, Hash, PartialEq)]
1126struct ResourceIdentity<T> {
1127    id: u64,
1128    _phantom: PhantomData<T>,
1129}
1130
1131#[derive(Clone, Eq, Hash, PartialEq)]
1132struct FramebufferKey {
1133    raw_pass: vk::RenderPass,
1134    /// Because this is used as a key in a hash map, we need to include the identity
1135    /// so that this hashes differently, even if the ImageView handles are the same
1136    /// between different views.
1137    attachment_identities: ArrayVec<ResourceIdentity<vk::ImageView>, { MAX_TOTAL_ATTACHMENTS }>,
1138    /// While this is redundant for calculating the hash, we need access to an array
1139    /// of all the raw ImageViews when we are creating the actual framebuffer,
1140    /// so we store this here.
1141    attachment_views: ArrayVec<vk::ImageView, { MAX_TOTAL_ATTACHMENTS }>,
1142    extent: wgt::Extent3d,
1143}
1144
1145impl FramebufferKey {
1146    fn push_view(&mut self, view: IdentifiedTextureView) {
1147        self.attachment_identities.push(view.identity);
1148        self.attachment_views.push(view.raw);
1149    }
1150}
1151
1152/// A texture view paired with its identity.
1153#[derive(Copy, Clone)]
1154struct IdentifiedTextureView {
1155    raw: vk::ImageView,
1156    identity: ResourceIdentity<vk::ImageView>,
1157}
1158
1159#[derive(Clone, Eq, Hash, PartialEq)]
1160struct TempTextureViewKey {
1161    texture: vk::Image,
1162    /// As this is used in a hashmap, we need to
1163    /// include the identity so that this hashes differently,
1164    /// even if the Image handles are the same between different images.
1165    texture_identity: ResourceIdentity<vk::Image>,
1166    format: vk::Format,
1167    mip_level: u32,
1168    depth_slice: u32,
1169}
1170
1171pub struct CommandEncoder {
1172    raw: vk::CommandPool,
1173    device: Arc<DeviceShared>,
1174
1175    /// The current command buffer, if `self` is in the ["recording"]
1176    /// state.
1177    ///
1178    /// ["recording"]: crate::CommandEncoder
1179    ///
1180    /// If non-`null`, the buffer is in the Vulkan "recording" state.
1181    active: vk::CommandBuffer,
1182
1183    /// What kind of pass we are currently within: compute or render.
1184    bind_point: vk::PipelineBindPoint,
1185
1186    /// Allocation recycling pool for this encoder.
1187    temp: Temp,
1188
1189    /// A pool of available command buffers.
1190    ///
1191    /// These are all in the Vulkan "initial" state.
1192    free: Vec<vk::CommandBuffer>,
1193
1194    /// A pool of discarded command buffers.
1195    ///
1196    /// These could be in any Vulkan state except "pending".
1197    discarded: Vec<vk::CommandBuffer>,
1198
1199    /// If this is true, the active renderpass enabled a debug span,
1200    /// and needs to be disabled on renderpass close.
1201    rpass_debug_marker_active: bool,
1202
1203    /// If set, the end of the next render/compute pass will write a timestamp at
1204    /// the given pool & location.
1205    end_of_pass_timer_query: Option<(vk::QueryPool, u32)>,
1206
1207    framebuffers: FastHashMap<FramebufferKey, vk::Framebuffer>,
1208    temp_texture_views: FastHashMap<TempTextureViewKey, IdentifiedTextureView>,
1209
1210    counters: Arc<wgt::HalCounters>,
1211}
1212
1213impl Drop for CommandEncoder {
1214    fn drop(&mut self) {
1215        // SAFETY:
1216        //
1217        // VUID-vkDestroyCommandPool-commandPool-00041: wgpu_hal requires that a
1218        // `CommandBuffer` must live until its execution is complete, and that a
1219        // `CommandBuffer` must not outlive the `CommandEncoder` that built it.
1220        // Thus, we know that none of our `CommandBuffers` are in the "pending"
1221        // state.
1222        //
1223        // The other VUIDs are pretty obvious.
1224        unsafe {
1225            // `vkDestroyCommandPool` also frees any command buffers allocated
1226            // from that pool, so there's no need to explicitly call
1227            // `vkFreeCommandBuffers` on `cmd_encoder`'s `free` and `discarded`
1228            // fields.
1229            self.device.raw.destroy_command_pool(self.raw, None);
1230        }
1231
1232        for (_, fb) in self.framebuffers.drain() {
1233            unsafe { self.device.raw.destroy_framebuffer(fb, None) };
1234        }
1235
1236        for (_, view) in self.temp_texture_views.drain() {
1237            unsafe { self.device.raw.destroy_image_view(view.raw, None) };
1238        }
1239
1240        self.counters.command_encoders.sub(1);
1241    }
1242}
1243
1244impl CommandEncoder {
1245    /// # Safety
1246    ///
1247    /// - The command buffer handle must not be manually destroyed
1248    pub unsafe fn raw_handle(&self) -> vk::CommandBuffer {
1249        self.active
1250    }
1251}
1252
1253impl fmt::Debug for CommandEncoder {
1254    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1255        f.debug_struct("CommandEncoder")
1256            .field("raw", &self.raw)
1257            .finish()
1258    }
1259}
1260
1261#[derive(Debug)]
1262pub struct CommandBuffer {
1263    raw: vk::CommandBuffer,
1264}
1265
1266impl crate::DynCommandBuffer for CommandBuffer {}
1267
1268#[derive(Debug)]
1269#[allow(clippy::large_enum_variant)]
1270pub enum ShaderModule {
1271    Raw(vk::ShaderModule),
1272    Intermediate {
1273        naga_shader: crate::NagaShader,
1274        runtime_checks: wgt::ShaderRuntimeChecks,
1275    },
1276}
1277
1278impl crate::DynShaderModule for ShaderModule {}
1279
1280#[derive(Debug)]
1281pub struct RenderPipeline {
1282    raw: vk::Pipeline,
1283}
1284
1285impl crate::DynRenderPipeline for RenderPipeline {}
1286
1287#[derive(Debug)]
1288pub struct ComputePipeline {
1289    raw: vk::Pipeline,
1290}
1291
1292impl crate::DynComputePipeline for ComputePipeline {}
1293
1294#[derive(Debug)]
1295pub struct PipelineCache {
1296    raw: vk::PipelineCache,
1297}
1298
1299impl crate::DynPipelineCache for PipelineCache {}
1300
1301#[derive(Debug)]
1302pub struct QuerySet {
1303    raw: vk::QueryPool,
1304}
1305
1306impl crate::DynQuerySet for QuerySet {}
1307
1308/// The [`Api::Fence`] type for [`vulkan::Api`].
1309///
1310/// This is an `enum` because there are two possible implementations of
1311/// `wgpu-hal` fences on Vulkan: Vulkan fences, which work on any version of
1312/// Vulkan, and Vulkan timeline semaphores, which are easier and cheaper but
1313/// require non-1.0 features.
1314///
1315/// [`Device::create_fence`] returns a [`TimelineSemaphore`] if
1316/// [`VK_KHR_timeline_semaphore`] is available and enabled, and a [`FencePool`]
1317/// otherwise.
1318///
1319/// [`Api::Fence`]: crate::Api::Fence
1320/// [`vulkan::Api`]: Api
1321/// [`Device::create_fence`]: crate::Device::create_fence
1322/// [`TimelineSemaphore`]: Fence::TimelineSemaphore
1323/// [`VK_KHR_timeline_semaphore`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VK_KHR_timeline_semaphore
1324/// [`FencePool`]: Fence::FencePool
1325#[derive(Debug)]
1326pub enum Fence {
1327    /// A Vulkan [timeline semaphore].
1328    ///
1329    /// These are simpler to use than Vulkan fences, since timeline semaphores
1330    /// work exactly the way [`wpgu_hal::Api::Fence`] is specified to work.
1331    ///
1332    /// [timeline semaphore]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-semaphores
1333    /// [`wpgu_hal::Api::Fence`]: crate::Api::Fence
1334    TimelineSemaphore(vk::Semaphore),
1335
1336    /// A collection of Vulkan [fence]s, each associated with a [`FenceValue`].
1337    ///
1338    /// The effective [`FenceValue`] of this variant is the greater of
1339    /// `last_completed` and the maximum value associated with a signalled fence
1340    /// in `active`.
1341    ///
1342    /// Fences are available in all versions of Vulkan, but since they only have
1343    /// two states, "signaled" and "unsignaled", we need to use a separate fence
1344    /// for each queue submission we might want to wait for, and remember which
1345    /// [`FenceValue`] each one represents.
1346    ///
1347    /// [fence]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-fences
1348    /// [`FenceValue`]: crate::FenceValue
1349    FencePool {
1350        last_completed: crate::FenceValue,
1351        /// The pending fence values have to be ascending.
1352        active: Vec<(crate::FenceValue, vk::Fence)>,
1353        free: Vec<vk::Fence>,
1354    },
1355}
1356
1357impl crate::DynFence for Fence {}
1358
1359impl Fence {
1360    /// Return the highest [`FenceValue`] among the signalled fences in `active`.
1361    ///
1362    /// As an optimization, assume that we already know that the fence has
1363    /// reached `last_completed`, and don't bother checking fences whose values
1364    /// are less than that: those fences remain in the `active` array only
1365    /// because we haven't called `maintain` yet to clean them up.
1366    ///
1367    /// [`FenceValue`]: crate::FenceValue
1368    fn check_active(
1369        device: &ash::Device,
1370        mut last_completed: crate::FenceValue,
1371        active: &[(crate::FenceValue, vk::Fence)],
1372    ) -> Result<crate::FenceValue, crate::DeviceError> {
1373        for &(value, raw) in active.iter() {
1374            unsafe {
1375                if value > last_completed
1376                    && device
1377                        .get_fence_status(raw)
1378                        .map_err(map_host_device_oom_and_lost_err)?
1379                {
1380                    last_completed = value;
1381                }
1382            }
1383        }
1384        Ok(last_completed)
1385    }
1386
1387    /// Return the highest signalled [`FenceValue`] for `self`.
1388    ///
1389    /// [`FenceValue`]: crate::FenceValue
1390    fn get_latest(
1391        &self,
1392        device: &ash::Device,
1393        extension: Option<&ExtensionFn<khr::timeline_semaphore::Device>>,
1394    ) -> Result<crate::FenceValue, crate::DeviceError> {
1395        match *self {
1396            Self::TimelineSemaphore(raw) => unsafe {
1397                Ok(match *extension.unwrap() {
1398                    ExtensionFn::Extension(ref ext) => ext
1399                        .get_semaphore_counter_value(raw)
1400                        .map_err(map_host_device_oom_and_lost_err)?,
1401                    ExtensionFn::Promoted => device
1402                        .get_semaphore_counter_value(raw)
1403                        .map_err(map_host_device_oom_and_lost_err)?,
1404                })
1405            },
1406            Self::FencePool {
1407                last_completed,
1408                ref active,
1409                free: _,
1410            } => Self::check_active(device, last_completed, active),
1411        }
1412    }
1413
1414    /// Trim the internal state of this [`Fence`].
1415    ///
1416    /// This function has no externally visible effect, but you should call it
1417    /// periodically to keep this fence's resource consumption under control.
1418    ///
1419    /// For fences using the [`FencePool`] implementation, this function
1420    /// recycles fences that have been signaled. If you don't call this,
1421    /// [`Queue::submit`] will just keep allocating a new Vulkan fence every
1422    /// time it's called.
1423    ///
1424    /// [`FencePool`]: Fence::FencePool
1425    /// [`Queue::submit`]: crate::Queue::submit
1426    fn maintain(&mut self, device: &ash::Device) -> Result<(), crate::DeviceError> {
1427        match *self {
1428            Self::TimelineSemaphore(_) => {}
1429            Self::FencePool {
1430                ref mut last_completed,
1431                ref mut active,
1432                ref mut free,
1433            } => {
1434                let latest = Self::check_active(device, *last_completed, active)?;
1435                let base_free = free.len();
1436                for &(value, raw) in active.iter() {
1437                    if value <= latest {
1438                        free.push(raw);
1439                    }
1440                }
1441                if free.len() != base_free {
1442                    active.retain(|&(value, _)| value > latest);
1443                    unsafe { device.reset_fences(&free[base_free..]) }
1444                        .map_err(map_device_oom_err)?
1445                }
1446                *last_completed = latest;
1447            }
1448        }
1449        Ok(())
1450    }
1451}
1452
1453impl crate::Queue for Queue {
1454    type A = Api;
1455
1456    unsafe fn submit(
1457        &self,
1458        command_buffers: &[&CommandBuffer],
1459        surface_textures: &[&SurfaceTexture],
1460        (signal_fence, signal_value): (&mut Fence, crate::FenceValue),
1461    ) -> Result<(), crate::DeviceError> {
1462        let mut fence_raw = vk::Fence::null();
1463
1464        let mut wait_stage_masks = Vec::new();
1465        let mut wait_semaphores = Vec::new();
1466        let mut signal_semaphores = SemaphoreList::default();
1467
1468        // Double check that the same swapchain image isn't being given to us multiple times,
1469        // as that will deadlock when we try to lock them all.
1470        debug_assert!(
1471            {
1472                let mut check = HashSet::with_capacity(surface_textures.len());
1473                // We compare the Arcs by pointer, as Eq isn't well defined for SurfaceSemaphores.
1474                for st in surface_textures {
1475                    check.insert(Arc::as_ptr(&st.acquire_semaphores) as usize);
1476                    check.insert(Arc::as_ptr(&st.present_semaphores) as usize);
1477                }
1478                check.len() == surface_textures.len() * 2
1479            },
1480            "More than one surface texture is being used from the same swapchain. This will cause a deadlock in release."
1481        );
1482
1483        let locked_swapchain_semaphores = surface_textures
1484            .iter()
1485            .map(|st| {
1486                let acquire = st
1487                    .acquire_semaphores
1488                    .try_lock()
1489                    .expect("Failed to lock surface acquire semaphore");
1490                let present = st
1491                    .present_semaphores
1492                    .try_lock()
1493                    .expect("Failed to lock surface present semaphore");
1494
1495                (acquire, present)
1496            })
1497            .collect::<Vec<_>>();
1498
1499        for (mut acquire_semaphore, mut present_semaphores) in locked_swapchain_semaphores {
1500            acquire_semaphore.set_used_fence_value(signal_value);
1501
1502            // If we're the first submission to operate on this image, wait on
1503            // its acquire semaphore, to make sure the presentation engine is
1504            // done with it.
1505            if let Some(sem) = acquire_semaphore.get_acquire_wait_semaphore() {
1506                wait_stage_masks.push(vk::PipelineStageFlags::TOP_OF_PIPE);
1507                wait_semaphores.push(sem);
1508            }
1509
1510            // Get a semaphore to signal when we're done writing to this surface
1511            // image. Presentation of this image will wait for this.
1512            let signal_semaphore = present_semaphores.get_submit_signal_semaphore(&self.device)?;
1513            signal_semaphores.push_binary(signal_semaphore);
1514        }
1515
1516        let mut guard = self.signal_semaphores.lock();
1517        if !guard.is_empty() {
1518            signal_semaphores.append(&mut guard);
1519        }
1520
1521        // In order for submissions to be strictly ordered, we encode a dependency between each submission
1522        // using a pair of semaphores. This adds a wait if it is needed, and signals the next semaphore.
1523        let semaphore_state = self.relay_semaphores.lock().advance(&self.device)?;
1524
1525        if let Some(sem) = semaphore_state.wait {
1526            wait_stage_masks.push(vk::PipelineStageFlags::TOP_OF_PIPE);
1527            wait_semaphores.push(sem);
1528        }
1529
1530        signal_semaphores.push_binary(semaphore_state.signal);
1531
1532        // We need to signal our wgpu::Fence if we have one, this adds it to the signal list.
1533        signal_fence.maintain(&self.device.raw)?;
1534        match *signal_fence {
1535            Fence::TimelineSemaphore(raw) => {
1536                signal_semaphores.push_timeline(raw, signal_value);
1537            }
1538            Fence::FencePool {
1539                ref mut active,
1540                ref mut free,
1541                ..
1542            } => {
1543                fence_raw = match free.pop() {
1544                    Some(raw) => raw,
1545                    None => unsafe {
1546                        self.device
1547                            .raw
1548                            .create_fence(&vk::FenceCreateInfo::default(), None)
1549                            .map_err(map_host_device_oom_err)?
1550                    },
1551                };
1552                active.push((signal_value, fence_raw));
1553            }
1554        }
1555
1556        let vk_cmd_buffers = command_buffers
1557            .iter()
1558            .map(|cmd| cmd.raw)
1559            .collect::<Vec<_>>();
1560
1561        let mut vk_info = vk::SubmitInfo::default().command_buffers(&vk_cmd_buffers);
1562
1563        vk_info = vk_info
1564            .wait_semaphores(&wait_semaphores)
1565            .wait_dst_stage_mask(&wait_stage_masks);
1566
1567        let mut vk_timeline_info = mem::MaybeUninit::uninit();
1568        vk_info = signal_semaphores.add_to_submit(vk_info, &mut vk_timeline_info);
1569
1570        profiling::scope!("vkQueueSubmit");
1571        unsafe {
1572            self.device
1573                .raw
1574                .queue_submit(self.raw, &[vk_info], fence_raw)
1575                .map_err(map_host_device_oom_and_lost_err)?
1576        };
1577        Ok(())
1578    }
1579
1580    unsafe fn present(
1581        &self,
1582        surface: &Surface,
1583        texture: SurfaceTexture,
1584    ) -> Result<(), crate::SurfaceError> {
1585        let mut swapchain = surface.swapchain.write();
1586        let ssc = swapchain.as_mut().unwrap();
1587        let mut acquire_semaphore = texture.acquire_semaphores.lock();
1588        let mut present_semaphores = texture.present_semaphores.lock();
1589
1590        let wait_semaphores = present_semaphores.get_present_wait_semaphores();
1591
1592        // Reset the acquire and present semaphores internal state
1593        // to be ready for the next frame.
1594        //
1595        // We do this before the actual call to present to ensure that
1596        // even if this method errors and early outs, we have reset
1597        // the state for next frame.
1598        acquire_semaphore.end_semaphore_usage();
1599        present_semaphores.end_semaphore_usage();
1600
1601        drop(acquire_semaphore);
1602
1603        let swapchains = [ssc.raw];
1604        let image_indices = [texture.index];
1605        let vk_info = vk::PresentInfoKHR::default()
1606            .swapchains(&swapchains)
1607            .image_indices(&image_indices)
1608            .wait_semaphores(&wait_semaphores);
1609
1610        let mut display_timing;
1611        let present_times;
1612        let vk_info = if let Some(present_time) = ssc.next_present_time.take() {
1613            debug_assert!(
1614                ssc.device
1615                    .features
1616                    .contains(wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING),
1617                "`next_present_time` should only be set if `VULKAN_GOOGLE_DISPLAY_TIMING` is enabled"
1618            );
1619            present_times = [present_time];
1620            display_timing = vk::PresentTimesInfoGOOGLE::default().times(&present_times);
1621            // SAFETY: We know that VK_GOOGLE_display_timing is present because of the safety contract on `next_present_time`.
1622            vk_info.push_next(&mut display_timing)
1623        } else {
1624            vk_info
1625        };
1626
1627        let suboptimal = {
1628            profiling::scope!("vkQueuePresentKHR");
1629            unsafe { self.swapchain_fn.queue_present(self.raw, &vk_info) }.map_err(|error| {
1630                match error {
1631                    vk::Result::ERROR_OUT_OF_DATE_KHR => crate::SurfaceError::Outdated,
1632                    vk::Result::ERROR_SURFACE_LOST_KHR => crate::SurfaceError::Lost,
1633                    // We don't use VK_EXT_full_screen_exclusive
1634                    // VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT
1635                    _ => map_host_device_oom_and_lost_err(error).into(),
1636                }
1637            })?
1638        };
1639        if suboptimal {
1640            // We treat `VK_SUBOPTIMAL_KHR` as `VK_SUCCESS` on Android.
1641            // On Android 10+, libvulkan's `vkQueuePresentKHR` implementation returns `VK_SUBOPTIMAL_KHR` if not doing pre-rotation
1642            // (i.e `VkSwapchainCreateInfoKHR::preTransform` not being equal to the current device orientation).
1643            // This is always the case when the device orientation is anything other than the identity one, as we unconditionally use `VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR`.
1644            #[cfg(not(target_os = "android"))]
1645            log::warn!("Suboptimal present of frame {}", texture.index);
1646        }
1647        Ok(())
1648    }
1649
1650    unsafe fn get_timestamp_period(&self) -> f32 {
1651        self.device.timestamp_period
1652    }
1653}
1654
1655impl Queue {
1656    pub fn raw_device(&self) -> &ash::Device {
1657        &self.device.raw
1658    }
1659
1660    pub fn add_signal_semaphore(&self, semaphore: vk::Semaphore, semaphore_value: Option<u64>) {
1661        let mut guard = self.signal_semaphores.lock();
1662        if let Some(value) = semaphore_value {
1663            guard.push_timeline(semaphore, value);
1664        } else {
1665            guard.push_binary(semaphore);
1666        }
1667    }
1668}
1669
1670/// Maps
1671///
1672/// - VK_ERROR_OUT_OF_HOST_MEMORY
1673/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1674fn map_host_device_oom_err(err: vk::Result) -> crate::DeviceError {
1675    match err {
1676        vk::Result::ERROR_OUT_OF_HOST_MEMORY | vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => {
1677            get_oom_err(err)
1678        }
1679        e => get_unexpected_err(e),
1680    }
1681}
1682
1683/// Maps
1684///
1685/// - VK_ERROR_OUT_OF_HOST_MEMORY
1686/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1687/// - VK_ERROR_DEVICE_LOST
1688fn map_host_device_oom_and_lost_err(err: vk::Result) -> crate::DeviceError {
1689    match err {
1690        vk::Result::ERROR_DEVICE_LOST => get_lost_err(),
1691        other => map_host_device_oom_err(other),
1692    }
1693}
1694
1695/// Maps
1696///
1697/// - VK_ERROR_OUT_OF_HOST_MEMORY
1698/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1699/// - VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1700fn map_host_device_oom_and_ioca_err(err: vk::Result) -> crate::DeviceError {
1701    // We don't use VK_KHR_buffer_device_address
1702    // VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1703    map_host_device_oom_err(err)
1704}
1705
1706/// Maps
1707///
1708/// - VK_ERROR_OUT_OF_HOST_MEMORY
1709fn map_host_oom_err(err: vk::Result) -> crate::DeviceError {
1710    match err {
1711        vk::Result::ERROR_OUT_OF_HOST_MEMORY => get_oom_err(err),
1712        e => get_unexpected_err(e),
1713    }
1714}
1715
1716/// Maps
1717///
1718/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1719fn map_device_oom_err(err: vk::Result) -> crate::DeviceError {
1720    match err {
1721        vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => get_oom_err(err),
1722        e => get_unexpected_err(e),
1723    }
1724}
1725
1726/// Maps
1727///
1728/// - VK_ERROR_OUT_OF_HOST_MEMORY
1729/// - VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1730fn map_host_oom_and_ioca_err(err: vk::Result) -> crate::DeviceError {
1731    // We don't use VK_KHR_buffer_device_address
1732    // VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1733    map_host_oom_err(err)
1734}
1735
1736/// Maps
1737///
1738/// - VK_ERROR_OUT_OF_HOST_MEMORY
1739/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1740/// - VK_PIPELINE_COMPILE_REQUIRED_EXT
1741/// - VK_ERROR_INVALID_SHADER_NV
1742fn map_pipeline_err(err: vk::Result) -> crate::DeviceError {
1743    // We don't use VK_EXT_pipeline_creation_cache_control
1744    // VK_PIPELINE_COMPILE_REQUIRED_EXT
1745    // We don't use VK_NV_glsl_shader
1746    // VK_ERROR_INVALID_SHADER_NV
1747    map_host_device_oom_err(err)
1748}
1749
1750/// Returns [`crate::DeviceError::Unexpected`] or panics if the `internal_error_panic`
1751/// feature flag is enabled.
1752fn get_unexpected_err(_err: vk::Result) -> crate::DeviceError {
1753    #[cfg(feature = "internal_error_panic")]
1754    panic!("Unexpected Vulkan error: {_err:?}");
1755
1756    #[allow(unreachable_code)]
1757    crate::DeviceError::Unexpected
1758}
1759
1760/// Returns [`crate::DeviceError::OutOfMemory`].
1761fn get_oom_err(_err: vk::Result) -> crate::DeviceError {
1762    crate::DeviceError::OutOfMemory
1763}
1764
1765/// Returns [`crate::DeviceError::Lost`] or panics if the `device_lost_panic`
1766/// feature flag is enabled.
1767fn get_lost_err() -> crate::DeviceError {
1768    #[cfg(feature = "device_lost_panic")]
1769    panic!("Device lost");
1770
1771    #[allow(unreachable_code)]
1772    crate::DeviceError::Lost
1773}
1774
1775#[derive(Clone, Copy, Pod, Zeroable)]
1776#[repr(C)]
1777struct RawTlasInstance {
1778    transform: [f32; 12],
1779    custom_data_and_mask: u32,
1780    shader_binding_table_record_offset_and_flags: u32,
1781    acceleration_structure_reference: u64,
1782}
1783
1784/// Arguments to the [`CreateDeviceCallback`].
1785pub struct CreateDeviceCallbackArgs<'arg, 'pnext, 'this>
1786where
1787    'this: 'pnext,
1788{
1789    /// The extensions to enable for the device. You must not remove anything from this list,
1790    /// but you may add to it.
1791    pub extensions: &'arg mut Vec<&'static CStr>,
1792    /// The physical device features to enable. You may enable features, but must not disable any.
1793    pub device_features: &'arg mut PhysicalDeviceFeatures,
1794    /// The queue create infos for the device. You may add or modify queue create infos as needed.
1795    pub queue_create_infos: &'arg mut Vec<vk::DeviceQueueCreateInfo<'pnext>>,
1796    /// The create info for the device. You may add or modify things in the pnext chain, but
1797    /// do not turn features off. Additionally, do not add things to the list of extensions,
1798    /// or to the feature set, as all changes to that member will be overwritten.
1799    pub create_info: &'arg mut vk::DeviceCreateInfo<'pnext>,
1800    /// We need to have `'this` in the struct, so we can declare that all lifetimes coming from
1801    /// captures in the closure will live longer (and hence satisfy) `'pnext`. However, we
1802    /// don't actually directly use `'this`
1803    _phantom: PhantomData<&'this ()>,
1804}
1805
1806/// Callback to allow changing the vulkan device creation parameters.
1807///
1808/// # Safety:
1809/// - If you want to add extensions, add the to the `Vec<'static CStr>` not the create info,
1810///   as the create info value will be overwritten.
1811/// - Callback must not remove features.
1812/// - Callback must not change anything to what the instance does not support.
1813pub type CreateDeviceCallback<'this> =
1814    dyn for<'arg, 'pnext> FnOnce(CreateDeviceCallbackArgs<'arg, 'pnext, 'this>) + 'this;
1815
1816/// Arguments to the [`CreateInstanceCallback`].
1817pub struct CreateInstanceCallbackArgs<'arg, 'pnext, 'this>
1818where
1819    'this: 'pnext,
1820{
1821    /// The extensions to enable for the instance. You must not remove anything from this list,
1822    /// but you may add to it.
1823    pub extensions: &'arg mut Vec<&'static CStr>,
1824    /// The create info for the instance. You may add or modify things in the pnext chain, but
1825    /// do not turn features off. Additionally, do not add things to the list of extensions,
1826    /// all changes to that member will be overwritten.
1827    pub create_info: &'arg mut vk::InstanceCreateInfo<'pnext>,
1828    /// Vulkan entry point.
1829    pub entry: &'arg ash::Entry,
1830    /// We need to have `'this` in the struct, so we can declare that all lifetimes coming from
1831    /// captures in the closure will live longer (and hence satisfy) `'pnext`. However, we
1832    /// don't actually directly use `'this`
1833    _phantom: PhantomData<&'this ()>,
1834}
1835
1836/// Callback to allow changing the vulkan instance creation parameters.
1837///
1838/// # Safety:
1839/// - If you want to add extensions, add the to the `Vec<'static CStr>` not the create info,
1840///   as the create info value will be overwritten.
1841/// - Callback must not remove features.
1842/// - Callback must not change anything to what the instance does not support.
1843pub type CreateInstanceCallback<'this> =
1844    dyn for<'arg, 'pnext> FnOnce(CreateInstanceCallbackArgs<'arg, 'pnext, 'this>) + 'this;