wgpu_hal/vulkan/
mod.rs

1/*!
2# Vulkan API internals.
3
4## Stack memory
5
6Ash expects slices, which we don't generally have available.
7We cope with this requirement by the combination of the following ways:
8  - temporarily allocating `Vec` on heap, where overhead is permitted
9  - growing temporary local storage
10
11## Framebuffers and Render passes
12
13Render passes are cached on the device and kept forever.
14
15Framebuffers are also cached on the device, but they are removed when
16any of the image views (they have) gets removed.
17If Vulkan supports image-less framebuffers,
18then the actual views are excluded from the framebuffer key.
19
20## Fences
21
22If timeline semaphores are available, they are used 1:1 with wgpu-hal fences.
23Otherwise, we manage a pool of `VkFence` objects behind each `hal::Fence`.
24
25!*/
26
27mod adapter;
28mod command;
29pub mod conv;
30mod descriptor;
31mod device;
32mod drm;
33mod instance;
34mod sampler;
35mod semaphore_list;
36mod swapchain;
37
38pub use adapter::PhysicalDeviceFeatures;
39
40use alloc::{boxed::Box, ffi::CString, sync::Arc, vec::Vec};
41use core::{borrow::Borrow, ffi::CStr, fmt, marker::PhantomData, mem, num::NonZeroU32};
42
43use arrayvec::ArrayVec;
44use ash::{ext, khr, vk};
45use bytemuck::{Pod, Zeroable};
46use hashbrown::HashSet;
47use parking_lot::{Mutex, RwLock};
48
49use naga::FastHashMap;
50use wgt::InternalCounter;
51
52use semaphore_list::SemaphoreList;
53
54use crate::vulkan::semaphore_list::{SemaphoreListMode, SemaphoreType};
55
56const MAX_TOTAL_ATTACHMENTS: usize = crate::MAX_COLOR_ATTACHMENTS * 2 + 1;
57
58#[derive(Clone, Debug)]
59pub struct Api;
60
61impl crate::Api for Api {
62    const VARIANT: wgt::Backend = wgt::Backend::Vulkan;
63
64    type Instance = Instance;
65    type Surface = Surface;
66    type Adapter = Adapter;
67    type Device = Device;
68
69    type Queue = Queue;
70    type CommandEncoder = CommandEncoder;
71    type CommandBuffer = CommandBuffer;
72
73    type Buffer = Buffer;
74    type Texture = Texture;
75    type SurfaceTexture = SurfaceTexture;
76    type TextureView = TextureView;
77    type Sampler = Sampler;
78    type QuerySet = QuerySet;
79    type Fence = Fence;
80    type AccelerationStructure = AccelerationStructure;
81    type PipelineCache = PipelineCache;
82
83    type BindGroupLayout = BindGroupLayout;
84    type BindGroup = BindGroup;
85    type PipelineLayout = PipelineLayout;
86    type ShaderModule = ShaderModule;
87    type RenderPipeline = RenderPipeline;
88    type ComputePipeline = ComputePipeline;
89}
90
91crate::impl_dyn_resource!(
92    Adapter,
93    AccelerationStructure,
94    BindGroup,
95    BindGroupLayout,
96    Buffer,
97    CommandBuffer,
98    CommandEncoder,
99    ComputePipeline,
100    Device,
101    Fence,
102    Instance,
103    PipelineCache,
104    PipelineLayout,
105    QuerySet,
106    Queue,
107    RenderPipeline,
108    Sampler,
109    ShaderModule,
110    Surface,
111    SurfaceTexture,
112    Texture,
113    TextureView
114);
115
116struct DebugUtils {
117    extension: ext::debug_utils::Instance,
118    messenger: vk::DebugUtilsMessengerEXT,
119
120    /// Owning pointer to the debug messenger callback user data.
121    ///
122    /// `InstanceShared::drop` destroys the debug messenger before
123    /// dropping this, so the callback should never receive a dangling
124    /// user data pointer.
125    #[allow(dead_code)]
126    callback_data: Box<DebugUtilsMessengerUserData>,
127}
128
129pub struct DebugUtilsCreateInfo {
130    severity: vk::DebugUtilsMessageSeverityFlagsEXT,
131    message_type: vk::DebugUtilsMessageTypeFlagsEXT,
132    callback_data: Box<DebugUtilsMessengerUserData>,
133}
134
135#[derive(Debug)]
136/// The properties related to the validation layer needed for the
137/// DebugUtilsMessenger for their workarounds
138struct ValidationLayerProperties {
139    /// Validation layer description, from `vk::LayerProperties`.
140    layer_description: CString,
141
142    /// Validation layer specification version, from `vk::LayerProperties`.
143    layer_spec_version: u32,
144}
145
146/// User data needed by `instance::debug_utils_messenger_callback`.
147///
148/// When we create the [`vk::DebugUtilsMessengerEXT`], the `pUserData`
149/// pointer refers to one of these values.
150#[derive(Debug)]
151pub struct DebugUtilsMessengerUserData {
152    /// The properties related to the validation layer, if present
153    validation_layer_properties: Option<ValidationLayerProperties>,
154
155    /// If the OBS layer is present. OBS never increments the version of their layer,
156    /// so there's no reason to have the version.
157    has_obs_layer: bool,
158}
159
160pub struct InstanceShared {
161    raw: ash::Instance,
162    extensions: Vec<&'static CStr>,
163    flags: wgt::InstanceFlags,
164    memory_budget_thresholds: wgt::MemoryBudgetThresholds,
165    debug_utils: Option<DebugUtils>,
166    get_physical_device_properties: Option<khr::get_physical_device_properties2::Instance>,
167    entry: ash::Entry,
168    has_nv_optimus: bool,
169    android_sdk_version: u32,
170    /// The instance API version.
171    ///
172    /// Which is the version of Vulkan supported for instance-level functionality.
173    ///
174    /// It is associated with a `VkInstance` and its children,
175    /// except for a `VkPhysicalDevice` and its children.
176    instance_api_version: u32,
177
178    // The `drop_guard` field must be the last field of this struct so it is dropped last.
179    // Do not add new fields after it.
180    drop_guard: Option<crate::DropGuard>,
181}
182
183pub struct Instance {
184    shared: Arc<InstanceShared>,
185}
186
187pub struct Surface {
188    swapchain: RwLock<Option<Box<dyn swapchain::Swapchain>>>,
189    inner: Box<dyn swapchain::Surface>,
190}
191
192impl Surface {
193    /// Returns the raw Vulkan surface handle.
194    ///
195    /// Returns `None` if the surface is a DXGI surface.
196    pub unsafe fn raw_native_handle(&self) -> Option<vk::SurfaceKHR> {
197        Some(
198            self.inner
199                .as_any()
200                .downcast_ref::<swapchain::NativeSurface>()?
201                .as_raw(),
202        )
203    }
204
205    /// Get the raw Vulkan swapchain associated with this surface.
206    ///
207    /// Returns [`None`] if the surface is not configured or if the swapchain
208    /// is a DXGI swapchain.
209    pub fn raw_native_swapchain(&self) -> Option<vk::SwapchainKHR> {
210        let read = self.swapchain.read();
211        Some(
212            read.as_ref()?
213                .as_any()
214                .downcast_ref::<swapchain::NativeSwapchain>()?
215                .as_raw(),
216        )
217    }
218
219    /// Set the present timing information which will be used for the next [presentation](crate::Queue::present()) of this surface,
220    /// using [VK_GOOGLE_display_timing].
221    ///
222    /// This can be used to give an id to presentations, for future use of [`vk::PastPresentationTimingGOOGLE`].
223    /// Note that `wgpu-hal` does *not* provide a way to use that API - you should manually access this through [`ash`].
224    ///
225    /// This can also be used to add a "not before" timestamp to the presentation.
226    ///
227    /// The exact semantics of the fields are also documented in the [specification](https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPresentTimeGOOGLE.html) for the extension.
228    ///
229    /// # Panics
230    ///
231    /// - If the surface hasn't been configured.
232    /// - If the surface has been configured for a DXGI swapchain.
233    /// - If the device doesn't [support present timing](wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING).
234    ///
235    /// [VK_GOOGLE_display_timing]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_GOOGLE_display_timing.html
236    #[track_caller]
237    pub fn set_next_present_time(&self, present_timing: vk::PresentTimeGOOGLE) {
238        let mut swapchain = self.swapchain.write();
239        swapchain
240            .as_mut()
241            .expect("Surface should have been configured")
242            .as_any_mut()
243            .downcast_mut::<swapchain::NativeSwapchain>()
244            .expect("Surface should have a native Vulkan swapchain")
245            .set_next_present_time(present_timing);
246    }
247}
248
249#[derive(Debug)]
250pub struct SurfaceTexture {
251    index: u32,
252    texture: Texture,
253    metadata: Box<dyn swapchain::SurfaceTextureMetadata>,
254}
255
256impl crate::DynSurfaceTexture for SurfaceTexture {}
257
258impl Borrow<Texture> for SurfaceTexture {
259    fn borrow(&self) -> &Texture {
260        &self.texture
261    }
262}
263
264impl Borrow<dyn crate::DynTexture> for SurfaceTexture {
265    fn borrow(&self) -> &dyn crate::DynTexture {
266        &self.texture
267    }
268}
269
270pub struct Adapter {
271    raw: vk::PhysicalDevice,
272    instance: Arc<InstanceShared>,
273    //queue_families: Vec<vk::QueueFamilyProperties>,
274    known_memory_flags: vk::MemoryPropertyFlags,
275    phd_capabilities: adapter::PhysicalDeviceProperties,
276    phd_features: PhysicalDeviceFeatures,
277    downlevel_flags: wgt::DownlevelFlags,
278    private_caps: PrivateCapabilities,
279    workarounds: Workarounds,
280}
281
282// TODO there's no reason why this can't be unified--the function pointers should all be the same--it's not clear how to do this with `ash`.
283enum ExtensionFn<T> {
284    /// The loaded function pointer struct for an extension.
285    Extension(T),
286    /// The extension was promoted to a core version of Vulkan and the functions on `ash`'s `DeviceV1_x` traits should be used.
287    Promoted,
288}
289
290struct DeviceExtensionFunctions {
291    debug_utils: Option<ext::debug_utils::Device>,
292    draw_indirect_count: Option<khr::draw_indirect_count::Device>,
293    timeline_semaphore: Option<ExtensionFn<khr::timeline_semaphore::Device>>,
294    ray_tracing: Option<RayTracingDeviceExtensionFunctions>,
295    mesh_shading: Option<ext::mesh_shader::Device>,
296    #[cfg_attr(not(unix), allow(dead_code))]
297    external_memory_fd: Option<khr::external_memory_fd::Device>,
298}
299
300struct RayTracingDeviceExtensionFunctions {
301    acceleration_structure: khr::acceleration_structure::Device,
302    buffer_device_address: khr::buffer_device_address::Device,
303}
304
305/// Set of internal capabilities, which don't show up in the exposed
306/// device geometry, but affect the code paths taken internally.
307#[derive(Clone, Debug)]
308struct PrivateCapabilities {
309    image_view_usage: bool,
310    timeline_semaphores: bool,
311    texture_d24: bool,
312    texture_d24_s8: bool,
313    texture_s8: bool,
314    /// Ability to present contents to any screen. Only needed to work around broken platform configurations.
315    can_present: bool,
316    non_coherent_map_mask: wgt::BufferAddress,
317    multi_draw_indirect: bool,
318    max_draw_indirect_count: u32,
319
320    /// True if this adapter advertises the [`robustBufferAccess`][vrba] feature.
321    ///
322    /// Note that Vulkan's `robustBufferAccess` is not sufficient to implement
323    /// `wgpu_hal`'s guarantee that shaders will not access buffer contents via
324    /// a given bindgroup binding outside that binding's [accessible
325    /// region][ar]. Enabling `robustBufferAccess` does ensure that
326    /// out-of-bounds reads and writes are not undefined behavior (that's good),
327    /// but still permits out-of-bounds reads to return data from anywhere
328    /// within the buffer, not just the accessible region.
329    ///
330    /// [ar]: ../struct.BufferBinding.html#accessible-region
331    /// [vrba]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#features-robustBufferAccess
332    robust_buffer_access: bool,
333
334    robust_image_access: bool,
335
336    /// True if this adapter supports the [`VK_EXT_robustness2`] extension's
337    /// [`robustBufferAccess2`] feature.
338    ///
339    /// This is sufficient to implement `wgpu_hal`'s [required bounds-checking][ar] of
340    /// shader accesses to buffer contents. If this feature is not available,
341    /// this backend must have Naga inject bounds checks in the generated
342    /// SPIR-V.
343    ///
344    /// [`VK_EXT_robustness2`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_EXT_robustness2.html
345    /// [`robustBufferAccess2`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPhysicalDeviceRobustness2FeaturesEXT.html#features-robustBufferAccess2
346    /// [ar]: ../struct.BufferBinding.html#accessible-region
347    robust_buffer_access2: bool,
348
349    robust_image_access2: bool,
350    zero_initialize_workgroup_memory: bool,
351    image_format_list: bool,
352    maximum_samplers: u32,
353
354    /// True if this adapter supports the [`VK_KHR_shader_integer_dot_product`] extension
355    /// (promoted to Vulkan 1.3).
356    ///
357    /// This is used to generate optimized code for WGSL's `dot4{I, U}8Packed`.
358    ///
359    /// [`VK_KHR_shader_integer_dot_product`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_KHR_shader_integer_dot_product.html
360    shader_integer_dot_product: bool,
361
362    /// True if this adapter supports 8-bit integers provided by the
363    /// [`VK_KHR_shader_float16_int8`] extension (promoted to Vulkan 1.2).
364    ///
365    /// Allows shaders to declare the "Int8" capability. Note, however, that this
366    /// feature alone allows the use of 8-bit integers "only in the `Private`,
367    /// `Workgroup` (for non-Block variables), and `Function` storage classes"
368    /// ([see spec]). To use 8-bit integers in the interface storage classes (e.g.,
369    /// `StorageBuffer`), you also need to enable the corresponding feature in
370    /// `VkPhysicalDevice8BitStorageFeatures` and declare the corresponding SPIR-V
371    /// capability (e.g., `StorageBuffer8BitAccess`).
372    ///
373    /// [`VK_KHR_shader_float16_int8`]: https://registry.khronos.org/vulkan/specs/latest/man/html/VK_KHR_shader_float16_int8.html
374    /// [see spec]: https://registry.khronos.org/vulkan/specs/latest/man/html/VkPhysicalDeviceShaderFloat16Int8Features.html#extension-features-shaderInt8
375    shader_int8: bool,
376
377    /// This is done to panic before undefined behavior, and is imperfect.
378    /// Basically, to allow implementations to emulate mv using instancing, if you
379    /// want to draw `n` instances to VR, you must draw `2n` instances, but you
380    /// can never draw more than `u32::MAX` instances. Therefore, when drawing
381    /// multiview on some vulkan implementations, it might restrict the instance
382    /// count, which isn't usually a thing in webgpu. We don't expose this limit
383    /// because its strange, i.e. only occurs on certain vulkan implementations
384    /// if you are drawing more than 128 million instances. We still want to avoid
385    /// undefined behavior in this situation, so we panic if the limit is violated.
386    multiview_instance_index_limit: u32,
387
388    /// BufferUsages::ACCELERATION_STRUCTURE_SCRATCH allows usage as a scratch buffer.
389    /// Vulkan has no way to specify this as a usage, and it maps to other usages, but
390    /// these usages do not have as high of an alignment requirement using the buffer as
391    ///  a scratch buffer when building acceleration structures.
392    scratch_buffer_alignment: u32,
393}
394
395bitflags::bitflags!(
396    /// Workaround flags.
397    #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
398    pub struct Workarounds: u32 {
399        /// Only generate SPIR-V for one entry point at a time.
400        const SEPARATE_ENTRY_POINTS = 0x1;
401        /// Qualcomm OOMs when there are zero color attachments but a non-null pointer
402        /// to a subpass resolve attachment array. This nulls out that pointer in that case.
403        const EMPTY_RESOLVE_ATTACHMENT_LISTS = 0x2;
404        /// If the following code returns false, then nvidia will end up filling the wrong range.
405        ///
406        /// ```skip
407        /// fn nvidia_succeeds() -> bool {
408        ///   # let (copy_length, start_offset) = (0, 0);
409        ///     if copy_length >= 4096 {
410        ///         if start_offset % 16 != 0 {
411        ///             if copy_length == 4096 {
412        ///                 return true;
413        ///             }
414        ///             if copy_length % 16 == 0 {
415        ///                 return false;
416        ///             }
417        ///         }
418        ///     }
419        ///     true
420        /// }
421        /// ```
422        ///
423        /// As such, we need to make sure all calls to vkCmdFillBuffer are aligned to 16 bytes
424        /// if they cover a range of 4096 bytes or more.
425        const FORCE_FILL_BUFFER_WITH_SIZE_GREATER_4096_ALIGNED_OFFSET_16 = 0x4;
426    }
427);
428
429#[derive(Clone, Debug, Eq, Hash, PartialEq)]
430struct AttachmentKey {
431    format: vk::Format,
432    layout: vk::ImageLayout,
433    ops: crate::AttachmentOps,
434}
435
436impl AttachmentKey {
437    /// Returns an attachment key for a compatible attachment.
438    fn compatible(format: vk::Format, layout: vk::ImageLayout) -> Self {
439        Self {
440            format,
441            layout,
442            ops: crate::AttachmentOps::all(),
443        }
444    }
445}
446
447#[derive(Clone, Eq, Hash, PartialEq)]
448struct ColorAttachmentKey {
449    base: AttachmentKey,
450    resolve: Option<AttachmentKey>,
451}
452
453#[derive(Clone, Eq, Hash, PartialEq)]
454struct DepthStencilAttachmentKey {
455    base: AttachmentKey,
456    stencil_ops: crate::AttachmentOps,
457}
458
459#[derive(Clone, Eq, Default, Hash, PartialEq)]
460struct RenderPassKey {
461    colors: ArrayVec<Option<ColorAttachmentKey>, { crate::MAX_COLOR_ATTACHMENTS }>,
462    depth_stencil: Option<DepthStencilAttachmentKey>,
463    sample_count: u32,
464    multiview_mask: Option<NonZeroU32>,
465}
466
467struct DeviceShared {
468    raw: ash::Device,
469    family_index: u32,
470    queue_index: u32,
471    raw_queue: vk::Queue,
472    instance: Arc<InstanceShared>,
473    physical_device: vk::PhysicalDevice,
474    enabled_extensions: Vec<&'static CStr>,
475    extension_fns: DeviceExtensionFunctions,
476    vendor_id: u32,
477    pipeline_cache_validation_key: [u8; 16],
478    timestamp_period: f32,
479    private_caps: PrivateCapabilities,
480    workarounds: Workarounds,
481    features: wgt::Features,
482    render_passes: Mutex<FastHashMap<RenderPassKey, vk::RenderPass>>,
483    sampler_cache: Mutex<sampler::SamplerCache>,
484    memory_allocations_counter: InternalCounter,
485
486    /// Because we have cached framebuffers which are not deleted from until
487    /// the device is destroyed, if the implementation of vulkan re-uses handles
488    /// we need some way to differentiate between the old handle and the new handle.
489    /// This factory allows us to have a dedicated identity value for each texture.
490    texture_identity_factory: ResourceIdentityFactory<vk::Image>,
491    /// As above, for texture views.
492    texture_view_identity_factory: ResourceIdentityFactory<vk::ImageView>,
493
494    empty_descriptor_set_layout: vk::DescriptorSetLayout,
495
496    // The `drop_guard` field must be the last field of this struct so it is dropped last.
497    // Do not add new fields after it.
498    drop_guard: Option<crate::DropGuard>,
499}
500
501impl Drop for DeviceShared {
502    fn drop(&mut self) {
503        for &raw in self.render_passes.lock().values() {
504            unsafe { self.raw.destroy_render_pass(raw, None) };
505        }
506        unsafe {
507            self.raw
508                .destroy_descriptor_set_layout(self.empty_descriptor_set_layout, None)
509        };
510        if self.drop_guard.is_none() {
511            unsafe { self.raw.destroy_device(None) };
512        }
513    }
514}
515
516pub struct Device {
517    mem_allocator: Mutex<gpu_allocator::vulkan::Allocator>,
518    desc_allocator: Mutex<descriptor::DescriptorAllocator>,
519    valid_ash_memory_types: u32,
520    naga_options: naga::back::spv::Options<'static>,
521    #[cfg(feature = "renderdoc")]
522    render_doc: crate::auxil::renderdoc::RenderDoc,
523    counters: Arc<wgt::HalCounters>,
524    // Struct members are dropped from first to last, put the Device last to ensure that
525    // all resources that depends on it are destroyed before it like the mem_allocator
526    shared: Arc<DeviceShared>,
527}
528
529impl Drop for Device {
530    fn drop(&mut self) {}
531}
532
533/// Semaphores for forcing queue submissions to run in order.
534///
535/// The [`wgpu_hal::Queue`] trait promises that if two calls to [`submit`] are
536/// ordered, then the first submission will finish on the GPU before the second
537/// submission begins. To get this behavior on Vulkan we need to pass semaphores
538/// to [`vkQueueSubmit`] for the commands to wait on before beginning execution,
539/// and to signal when their execution is done.
540///
541/// Normally this can be done with a single semaphore, waited on and then
542/// signalled for each submission. At any given time there's exactly one
543/// submission that would signal the semaphore, and exactly one waiting on it,
544/// as Vulkan requires.
545///
546/// However, as of Oct 2021, bug [#5508] in the Mesa ANV drivers caused them to
547/// hang if we use a single semaphore. The workaround is to alternate between
548/// two semaphores. The bug has been fixed in Mesa, but we should probably keep
549/// the workaround until, say, Oct 2026.
550///
551/// [`wgpu_hal::Queue`]: crate::Queue
552/// [`submit`]: crate::Queue::submit
553/// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
554/// [#5508]: https://gitlab.freedesktop.org/mesa/mesa/-/issues/5508
555#[derive(Clone)]
556struct RelaySemaphores {
557    /// The semaphore the next submission should wait on before beginning
558    /// execution on the GPU. This is `None` for the first submission, which
559    /// should not wait on anything at all.
560    wait: Option<vk::Semaphore>,
561
562    /// The semaphore the next submission should signal when it has finished
563    /// execution on the GPU.
564    signal: vk::Semaphore,
565}
566
567impl RelaySemaphores {
568    fn new(device: &DeviceShared) -> Result<Self, crate::DeviceError> {
569        Ok(Self {
570            wait: None,
571            signal: device.new_binary_semaphore("RelaySemaphores: 1")?,
572        })
573    }
574
575    /// Advances the semaphores, returning the semaphores that should be used for a submission.
576    fn advance(&mut self, device: &DeviceShared) -> Result<Self, crate::DeviceError> {
577        let old = self.clone();
578
579        // Build the state for the next submission.
580        match self.wait {
581            None => {
582                // The `old` values describe the first submission to this queue.
583                // The second submission should wait on `old.signal`, and then
584                // signal a new semaphore which we'll create now.
585                self.wait = Some(old.signal);
586                self.signal = device.new_binary_semaphore("RelaySemaphores: 2")?;
587            }
588            Some(ref mut wait) => {
589                // What this submission signals, the next should wait.
590                mem::swap(wait, &mut self.signal);
591            }
592        };
593
594        Ok(old)
595    }
596
597    /// Destroys the semaphores.
598    unsafe fn destroy(&self, device: &ash::Device) {
599        unsafe {
600            if let Some(wait) = self.wait {
601                device.destroy_semaphore(wait, None);
602            }
603            device.destroy_semaphore(self.signal, None);
604        }
605    }
606}
607
608pub struct Queue {
609    raw: vk::Queue,
610    device: Arc<DeviceShared>,
611    family_index: u32,
612    relay_semaphores: Mutex<RelaySemaphores>,
613    signal_semaphores: Mutex<SemaphoreList>,
614    wait_semaphores: Mutex<SemaphoreList>,
615}
616
617impl Queue {
618    pub fn as_raw(&self) -> vk::Queue {
619        self.raw
620    }
621}
622
623impl Drop for Queue {
624    fn drop(&mut self) {
625        unsafe { self.relay_semaphores.lock().destroy(&self.device.raw) };
626    }
627}
628#[derive(Debug)]
629enum BufferMemoryBacking {
630    Managed(gpu_allocator::vulkan::Allocation),
631    VulkanMemory {
632        memory: vk::DeviceMemory,
633        offset: u64,
634        size: u64,
635    },
636}
637impl BufferMemoryBacking {
638    fn memory(&self) -> vk::DeviceMemory {
639        match self {
640            Self::Managed(m) => unsafe { m.memory() },
641            Self::VulkanMemory { memory, .. } => *memory,
642        }
643    }
644    fn offset(&self) -> u64 {
645        match self {
646            Self::Managed(m) => m.offset(),
647            Self::VulkanMemory { offset, .. } => *offset,
648        }
649    }
650    fn size(&self) -> u64 {
651        match self {
652            Self::Managed(m) => m.size(),
653            Self::VulkanMemory { size, .. } => *size,
654        }
655    }
656}
657/// Describes who owns a [`Buffer`]'s `vk::Buffer` handle and its backing memory,
658/// and therefore what cleanup is required when the buffer is destroyed.
659#[derive(Debug)]
660enum BufferOwnership {
661    /// wgpu-hal owns the `vk::Buffer` and its backing memory. On cleanup the buffer
662    /// handle is destroyed and the memory is released.
663    Managed(Mutex<BufferMemoryBacking>),
664    /// wgpu-hal owns the `vk::Buffer` handle but the backing memory is kept alive
665    /// by the caller. On cleanup only the buffer handle is destroyed.
666    RawHandle,
667    /// Caller owns the `vk::Buffer` and its backing memory. On cleanup the
668    /// [`crate::DropGuard`] runs the caller's cleanup callback and wgpu-hal touches
669    /// neither the handle nor the memory.
670    External(crate::DropGuard),
671}
672
673#[derive(Debug)]
674pub struct Buffer {
675    raw: vk::Buffer,
676
677    // This field must be last, because it may contain a `DropGuard` which needs to be dropped after all other fields.
678    ownership: BufferOwnership,
679}
680impl Buffer {
681    /// # Safety
682    ///
683    /// - `vk_buffer`'s memory must be managed by the caller
684    /// - Externally imported buffers can't be mapped by `wgpu`
685    pub unsafe fn from_raw(vk_buffer: vk::Buffer) -> Self {
686        Self {
687            raw: vk_buffer,
688            ownership: BufferOwnership::RawHandle,
689        }
690    }
691
692    /// # Safety
693    /// - `vk_buffer` must outlive the returned `Buffer`.
694    /// - wgpu-hal will NOT call `vkDestroyBuffer`; the caller remains responsible for the buffer handle's destruction.
695    ///   The `drop_callback` runs when the `Buffer` drops and may be used to release caller-side bookkeeping.
696    /// - Externally imported buffers can't be mapped by `wgpu`.
697    pub unsafe fn from_raw_externally_owned(
698        vk_buffer: vk::Buffer,
699        drop_callback: crate::DropCallback,
700    ) -> Self {
701        Self {
702            raw: vk_buffer,
703            ownership: BufferOwnership::External(crate::DropGuard::new(drop_callback)),
704        }
705    }
706
707    /// # Safety
708    /// - We will use this buffer and the buffer's backing memory range as if we have exclusive ownership over it, until the wgpu resource is dropped and the wgpu-hal object is cleaned up
709    /// - Externally imported buffers can't be mapped by `wgpu`
710    /// - `offset` and `size` must be valid with the allocation of `memory`
711    pub unsafe fn from_raw_managed(
712        vk_buffer: vk::Buffer,
713        memory: vk::DeviceMemory,
714        offset: u64,
715        size: u64,
716    ) -> Self {
717        Self {
718            raw: vk_buffer,
719            ownership: BufferOwnership::Managed(Mutex::new(BufferMemoryBacking::VulkanMemory {
720                memory,
721                offset,
722                size,
723            })),
724        }
725    }
726
727    /// # Safety
728    /// - The buffer handle must not be manually destroyed
729    pub unsafe fn raw_handle(&self) -> vk::Buffer {
730        self.raw
731    }
732}
733
734impl crate::DynBuffer for Buffer {}
735
736#[derive(Debug)]
737pub struct AccelerationStructure {
738    raw: vk::AccelerationStructureKHR,
739    buffer: vk::Buffer,
740    allocation: gpu_allocator::vulkan::Allocation,
741    compacted_size_query: Option<vk::QueryPool>,
742}
743
744impl crate::DynAccelerationStructure for AccelerationStructure {}
745
746#[derive(Debug)]
747pub enum TextureMemory {
748    // shared memory in GPU allocator (owned by wgpu-hal)
749    Allocation(gpu_allocator::vulkan::Allocation),
750
751    // dedicated memory (owned by wgpu-hal)
752    Dedicated(vk::DeviceMemory),
753
754    // memory not owned by wgpu
755    External,
756}
757
758#[derive(Debug)]
759pub struct Texture {
760    raw: vk::Image,
761    memory: TextureMemory,
762    format: wgt::TextureFormat,
763    copy_size: crate::CopyExtent,
764    identity: ResourceIdentity<vk::Image>,
765
766    // The `drop_guard` field must be the last field of this struct so it is dropped last.
767    // Do not add new fields after it.
768    drop_guard: Option<crate::DropGuard>,
769}
770
771impl crate::DynTexture for Texture {}
772
773impl Texture {
774    /// # Safety
775    ///
776    /// - The image handle must not be manually destroyed
777    pub unsafe fn raw_handle(&self) -> vk::Image {
778        self.raw
779    }
780
781    /// # Safety
782    ///
783    /// - The caller must not free the `vk::DeviceMemory` or
784    ///   `gpu_alloc::MemoryBlock` in the returned `TextureMemory`.
785    pub unsafe fn memory(&self) -> &TextureMemory {
786        &self.memory
787    }
788}
789
790#[derive(Debug)]
791pub struct TextureView {
792    raw_texture: vk::Image,
793    raw: vk::ImageView,
794    _layers: NonZeroU32,
795    format: wgt::TextureFormat,
796    raw_format: vk::Format,
797    base_mip_level: u32,
798    dimension: wgt::TextureViewDimension,
799    texture_identity: ResourceIdentity<vk::Image>,
800    view_identity: ResourceIdentity<vk::ImageView>,
801}
802
803impl crate::DynTextureView for TextureView {}
804
805impl TextureView {
806    /// # Safety
807    ///
808    /// - The image view handle must not be manually destroyed
809    pub unsafe fn raw_handle(&self) -> vk::ImageView {
810        self.raw
811    }
812
813    /// Returns the raw texture view, along with its identity.
814    fn identified_raw_view(&self) -> IdentifiedTextureView {
815        IdentifiedTextureView {
816            raw: self.raw,
817            identity: self.view_identity,
818        }
819    }
820}
821
822#[derive(Debug)]
823pub struct Sampler {
824    raw: vk::Sampler,
825    create_info: vk::SamplerCreateInfo<'static>,
826}
827
828impl crate::DynSampler for Sampler {}
829
830/// Information about a binding within a specific BindGroupLayout / BindGroup.
831/// This will be used to construct a [`naga::back::spv::BindingInfo`], where
832/// the descriptor set value will be taken from the index of the group.
833#[derive(Copy, Clone, Debug)]
834struct BindingInfo {
835    binding: u32,
836    binding_array_size: Option<NonZeroU32>,
837}
838
839#[derive(Debug)]
840pub struct BindGroupLayout {
841    raw: vk::DescriptorSetLayout,
842    desc_count: descriptor::DescriptorCounts,
843    /// Sorted list of entries.
844    entries: Box<[wgt::BindGroupLayoutEntry]>,
845    /// Map of original binding index to remapped binding index and optional
846    /// array size.
847    binding_map: Vec<(u32, BindingInfo)>,
848    contains_binding_arrays: bool,
849}
850
851impl crate::DynBindGroupLayout for BindGroupLayout {}
852
853#[derive(Debug)]
854pub struct PipelineLayout {
855    raw: vk::PipelineLayout,
856    binding_map: naga::back::spv::BindingMap,
857}
858
859impl crate::DynPipelineLayout for PipelineLayout {}
860
861#[derive(Debug)]
862pub struct BindGroup {
863    set: descriptor::DescriptorSet,
864}
865
866impl crate::DynBindGroup for BindGroup {}
867
868/// Miscellaneous allocation recycling pool for `CommandAllocator`.
869#[derive(Default)]
870struct Temp {
871    marker: Vec<u8>,
872    buffer_barriers: Vec<vk::BufferMemoryBarrier<'static>>,
873    image_barriers: Vec<vk::ImageMemoryBarrier<'static>>,
874}
875
876impl Temp {
877    fn clear(&mut self) {
878        self.marker.clear();
879        self.buffer_barriers.clear();
880        self.image_barriers.clear();
881    }
882
883    fn make_c_str(&mut self, name: &str) -> &CStr {
884        self.marker.clear();
885        self.marker.extend_from_slice(name.as_bytes());
886        self.marker.push(0);
887        unsafe { CStr::from_bytes_with_nul_unchecked(&self.marker) }
888    }
889}
890
891/// Generates unique IDs for each resource of type `T`.
892///
893/// Because vk handles are not permanently unique, this
894/// provides a way to generate unique IDs for each resource.
895struct ResourceIdentityFactory<T> {
896    #[cfg(not(target_has_atomic = "64"))]
897    next_id: Mutex<u64>,
898    #[cfg(target_has_atomic = "64")]
899    next_id: core::sync::atomic::AtomicU64,
900    _phantom: PhantomData<T>,
901}
902
903impl<T> ResourceIdentityFactory<T> {
904    fn new() -> Self {
905        Self {
906            #[cfg(not(target_has_atomic = "64"))]
907            next_id: Mutex::new(0),
908            #[cfg(target_has_atomic = "64")]
909            next_id: core::sync::atomic::AtomicU64::new(0),
910            _phantom: PhantomData,
911        }
912    }
913
914    /// Returns a new unique ID for a resource of type `T`.
915    fn next(&self) -> ResourceIdentity<T> {
916        #[cfg(not(target_has_atomic = "64"))]
917        {
918            let mut next_id = self.next_id.lock();
919            let id = *next_id;
920            *next_id += 1;
921            ResourceIdentity {
922                id,
923                _phantom: PhantomData,
924            }
925        }
926
927        #[cfg(target_has_atomic = "64")]
928        ResourceIdentity {
929            id: self
930                .next_id
931                .fetch_add(1, core::sync::atomic::Ordering::Relaxed),
932            _phantom: PhantomData,
933        }
934    }
935}
936
937/// A unique identifier for a resource of type `T`.
938///
939/// This is used as a hashable key for resources, which
940/// is permanently unique through the lifetime of the program.
941#[derive(Debug, Copy, Clone, Eq, Hash, PartialEq)]
942struct ResourceIdentity<T> {
943    id: u64,
944    _phantom: PhantomData<T>,
945}
946
947#[derive(Clone, Eq, Hash, PartialEq)]
948struct FramebufferKey {
949    raw_pass: vk::RenderPass,
950    /// Because this is used as a key in a hash map, we need to include the identity
951    /// so that this hashes differently, even if the ImageView handles are the same
952    /// between different views.
953    attachment_identities: ArrayVec<ResourceIdentity<vk::ImageView>, { MAX_TOTAL_ATTACHMENTS }>,
954    /// While this is redundant for calculating the hash, we need access to an array
955    /// of all the raw ImageViews when we are creating the actual framebuffer,
956    /// so we store this here.
957    attachment_views: ArrayVec<vk::ImageView, { MAX_TOTAL_ATTACHMENTS }>,
958    extent: wgt::Extent3d,
959}
960
961impl FramebufferKey {
962    fn push_view(&mut self, view: IdentifiedTextureView) {
963        self.attachment_identities.push(view.identity);
964        self.attachment_views.push(view.raw);
965    }
966}
967
968/// A texture view paired with its identity.
969#[derive(Copy, Clone)]
970struct IdentifiedTextureView {
971    raw: vk::ImageView,
972    identity: ResourceIdentity<vk::ImageView>,
973}
974
975#[derive(Clone, Eq, Hash, PartialEq)]
976struct TempTextureViewKey {
977    texture: vk::Image,
978    /// As this is used in a hashmap, we need to
979    /// include the identity so that this hashes differently,
980    /// even if the Image handles are the same between different images.
981    texture_identity: ResourceIdentity<vk::Image>,
982    format: vk::Format,
983    mip_level: u32,
984    depth_slice: u32,
985}
986
987pub struct CommandEncoder {
988    raw: vk::CommandPool,
989    device: Arc<DeviceShared>,
990
991    /// The current command buffer, if `self` is in the ["recording"]
992    /// state.
993    ///
994    /// ["recording"]: crate::CommandEncoder
995    ///
996    /// If non-`null`, the buffer is in the Vulkan "recording" state.
997    active: vk::CommandBuffer,
998
999    /// What kind of pass we are currently within: compute or render.
1000    bind_point: vk::PipelineBindPoint,
1001
1002    /// Allocation recycling pool for this encoder.
1003    temp: Temp,
1004
1005    /// A pool of available command buffers.
1006    ///
1007    /// These are all in the Vulkan "initial" state.
1008    free: Vec<vk::CommandBuffer>,
1009
1010    /// A pool of discarded command buffers.
1011    ///
1012    /// These could be in any Vulkan state except "pending".
1013    discarded: Vec<vk::CommandBuffer>,
1014
1015    /// If this is true, the active renderpass enabled a debug span,
1016    /// and needs to be disabled on renderpass close.
1017    rpass_debug_marker_active: bool,
1018
1019    /// If set, the end of the next render/compute pass will write a timestamp at
1020    /// the given pool & location.
1021    end_of_pass_timer_query: Option<(vk::QueryPool, u32)>,
1022
1023    framebuffers: FastHashMap<FramebufferKey, vk::Framebuffer>,
1024    temp_texture_views: FastHashMap<TempTextureViewKey, IdentifiedTextureView>,
1025
1026    counters: Arc<wgt::HalCounters>,
1027
1028    current_pipeline_is_multiview: bool,
1029}
1030
1031impl Drop for CommandEncoder {
1032    fn drop(&mut self) {
1033        // SAFETY:
1034        //
1035        // VUID-vkDestroyCommandPool-commandPool-00041: wgpu_hal requires that a
1036        // `CommandBuffer` must live until its execution is complete, and that a
1037        // `CommandBuffer` must not outlive the `CommandEncoder` that built it.
1038        // Thus, we know that none of our `CommandBuffers` are in the "pending"
1039        // state.
1040        //
1041        // The other VUIDs are pretty obvious.
1042        unsafe {
1043            // `vkDestroyCommandPool` also frees any command buffers allocated
1044            // from that pool, so there's no need to explicitly call
1045            // `vkFreeCommandBuffers` on `cmd_encoder`'s `free` and `discarded`
1046            // fields.
1047            self.device.raw.destroy_command_pool(self.raw, None);
1048        }
1049
1050        for (_, fb) in self.framebuffers.drain() {
1051            unsafe { self.device.raw.destroy_framebuffer(fb, None) };
1052        }
1053
1054        for (_, view) in self.temp_texture_views.drain() {
1055            unsafe { self.device.raw.destroy_image_view(view.raw, None) };
1056        }
1057
1058        self.counters.command_encoders.sub(1);
1059    }
1060}
1061
1062impl CommandEncoder {
1063    /// # Safety
1064    ///
1065    /// - The command buffer handle must not be manually destroyed
1066    pub unsafe fn raw_handle(&self) -> vk::CommandBuffer {
1067        self.active
1068    }
1069}
1070
1071impl fmt::Debug for CommandEncoder {
1072    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1073        f.debug_struct("CommandEncoder")
1074            .field("raw", &self.raw)
1075            .finish()
1076    }
1077}
1078
1079#[derive(Debug)]
1080pub struct CommandBuffer {
1081    raw: vk::CommandBuffer,
1082}
1083
1084impl crate::DynCommandBuffer for CommandBuffer {}
1085
1086#[derive(Debug)]
1087pub enum ShaderModule {
1088    Raw(vk::ShaderModule),
1089    Intermediate {
1090        naga_shader: crate::NagaShader,
1091        runtime_checks: wgt::ShaderRuntimeChecks,
1092    },
1093}
1094
1095impl crate::DynShaderModule for ShaderModule {}
1096
1097#[derive(Debug)]
1098pub struct RenderPipeline {
1099    raw: vk::Pipeline,
1100    is_multiview: bool,
1101}
1102
1103impl crate::DynRenderPipeline for RenderPipeline {}
1104
1105#[derive(Debug)]
1106pub struct ComputePipeline {
1107    raw: vk::Pipeline,
1108}
1109
1110impl crate::DynComputePipeline for ComputePipeline {}
1111
1112#[derive(Debug)]
1113pub struct PipelineCache {
1114    raw: vk::PipelineCache,
1115}
1116
1117impl crate::DynPipelineCache for PipelineCache {}
1118
1119#[derive(Debug)]
1120pub struct QuerySet {
1121    raw: vk::QueryPool,
1122}
1123
1124impl crate::DynQuerySet for QuerySet {}
1125
1126/// The [`Api::Fence`] type for [`vulkan::Api`].
1127///
1128/// This is an `enum` because there are two possible implementations of
1129/// `wgpu-hal` fences on Vulkan: Vulkan fences, which work on any version of
1130/// Vulkan, and Vulkan timeline semaphores, which are easier and cheaper but
1131/// require non-1.0 features.
1132///
1133/// [`Device::create_fence`] returns a [`TimelineSemaphore`] if
1134/// [`VK_KHR_timeline_semaphore`] is available and enabled, and a [`FencePool`]
1135/// otherwise.
1136///
1137/// [`Api::Fence`]: crate::Api::Fence
1138/// [`vulkan::Api`]: Api
1139/// [`Device::create_fence`]: crate::Device::create_fence
1140/// [`TimelineSemaphore`]: Fence::TimelineSemaphore
1141/// [`VK_KHR_timeline_semaphore`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VK_KHR_timeline_semaphore
1142/// [`FencePool`]: Fence::FencePool
1143#[derive(Debug)]
1144pub enum Fence {
1145    /// A Vulkan [timeline semaphore].
1146    ///
1147    /// These are simpler to use than Vulkan fences, since timeline semaphores
1148    /// work exactly the way [`wpgu_hal::Api::Fence`] is specified to work.
1149    ///
1150    /// [timeline semaphore]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-semaphores
1151    /// [`wpgu_hal::Api::Fence`]: crate::Api::Fence
1152    TimelineSemaphore(vk::Semaphore),
1153
1154    /// A collection of Vulkan [fence]s, each associated with a [`FenceValue`].
1155    ///
1156    /// The effective [`FenceValue`] of this variant is the greater of
1157    /// `last_completed` and the maximum value associated with a signalled fence
1158    /// in `active`.
1159    ///
1160    /// Fences are available in all versions of Vulkan, but since they only have
1161    /// two states, "signaled" and "unsignaled", we need to use a separate fence
1162    /// for each queue submission we might want to wait for, and remember which
1163    /// [`FenceValue`] each one represents.
1164    ///
1165    /// [fence]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-fences
1166    /// [`FenceValue`]: crate::FenceValue
1167    FencePool {
1168        last_completed: crate::FenceValue,
1169        /// The pending fence values have to be ascending.
1170        active: Vec<(crate::FenceValue, vk::Fence)>,
1171        free: Vec<vk::Fence>,
1172    },
1173}
1174
1175impl crate::DynFence for Fence {}
1176
1177impl Fence {
1178    /// Return the highest [`FenceValue`] among the signalled fences in `active`.
1179    ///
1180    /// As an optimization, assume that we already know that the fence has
1181    /// reached `last_completed`, and don't bother checking fences whose values
1182    /// are less than that: those fences remain in the `active` array only
1183    /// because we haven't called `maintain` yet to clean them up.
1184    ///
1185    /// [`FenceValue`]: crate::FenceValue
1186    fn check_active(
1187        device: &ash::Device,
1188        mut last_completed: crate::FenceValue,
1189        active: &[(crate::FenceValue, vk::Fence)],
1190    ) -> Result<crate::FenceValue, crate::DeviceError> {
1191        for &(value, raw) in active.iter() {
1192            unsafe {
1193                if value > last_completed
1194                    && device
1195                        .get_fence_status(raw)
1196                        .map_err(map_host_device_oom_and_lost_err)?
1197                {
1198                    last_completed = value;
1199                }
1200            }
1201        }
1202        Ok(last_completed)
1203    }
1204
1205    /// Return the highest signalled [`FenceValue`] for `self`.
1206    ///
1207    /// [`FenceValue`]: crate::FenceValue
1208    fn get_latest(
1209        &self,
1210        device: &ash::Device,
1211        extension: Option<&ExtensionFn<khr::timeline_semaphore::Device>>,
1212    ) -> Result<crate::FenceValue, crate::DeviceError> {
1213        match *self {
1214            Self::TimelineSemaphore(raw) => unsafe {
1215                Ok(match *extension.unwrap() {
1216                    ExtensionFn::Extension(ref ext) => ext
1217                        .get_semaphore_counter_value(raw)
1218                        .map_err(map_host_device_oom_and_lost_err)?,
1219                    ExtensionFn::Promoted => device
1220                        .get_semaphore_counter_value(raw)
1221                        .map_err(map_host_device_oom_and_lost_err)?,
1222                })
1223            },
1224            Self::FencePool {
1225                last_completed,
1226                ref active,
1227                free: _,
1228            } => Self::check_active(device, last_completed, active),
1229        }
1230    }
1231
1232    /// Trim the internal state of this [`Fence`].
1233    ///
1234    /// This function has no externally visible effect, but you should call it
1235    /// periodically to keep this fence's resource consumption under control.
1236    ///
1237    /// For fences using the [`FencePool`] implementation, this function
1238    /// recycles fences that have been signaled. If you don't call this,
1239    /// [`Queue::submit`] will just keep allocating a new Vulkan fence every
1240    /// time it's called.
1241    ///
1242    /// [`FencePool`]: Fence::FencePool
1243    /// [`Queue::submit`]: crate::Queue::submit
1244    fn maintain(&mut self, device: &ash::Device) -> Result<(), crate::DeviceError> {
1245        match *self {
1246            Self::TimelineSemaphore(_) => {}
1247            Self::FencePool {
1248                ref mut last_completed,
1249                ref mut active,
1250                ref mut free,
1251            } => {
1252                let latest = Self::check_active(device, *last_completed, active)?;
1253                let base_free = free.len();
1254                for &(value, raw) in active.iter() {
1255                    if value <= latest {
1256                        free.push(raw);
1257                    }
1258                }
1259                if free.len() != base_free {
1260                    active.retain(|&(value, _)| value > latest);
1261                    unsafe { device.reset_fences(&free[base_free..]) }
1262                        .map_err(map_device_oom_err)?
1263                }
1264                *last_completed = latest;
1265            }
1266        }
1267        Ok(())
1268    }
1269}
1270
1271impl crate::Queue for Queue {
1272    type A = Api;
1273
1274    unsafe fn submit(
1275        &self,
1276        command_buffers: &[&CommandBuffer],
1277        surface_textures: &[&SurfaceTexture],
1278        (signal_fence, signal_value): (&mut Fence, crate::FenceValue),
1279    ) -> Result<(), crate::DeviceError> {
1280        let mut fence_raw = vk::Fence::null();
1281
1282        let mut wait_semaphores = SemaphoreList::new(SemaphoreListMode::Wait);
1283        let mut signal_semaphores = SemaphoreList::new(SemaphoreListMode::Signal);
1284
1285        // Double check that the same swapchain image isn't being given to us multiple times,
1286        // as that will deadlock when we try to lock them all.
1287        debug_assert!(
1288            {
1289                let mut check = HashSet::with_capacity(surface_textures.len());
1290                // We compare the Box by pointer, as Eq isn't well defined for SurfaceSemaphores.
1291                for st in surface_textures {
1292                    let ptr: *const () = <*const _>::cast(&*st.metadata);
1293                    check.insert(ptr as usize);
1294                }
1295                check.len() == surface_textures.len()
1296            },
1297            "More than one surface texture is being used from the same swapchain. This will cause a deadlock in release."
1298        );
1299
1300        let locked_swapchain_semaphores = surface_textures
1301            .iter()
1302            .map(|st| st.metadata.get_semaphore_guard())
1303            .collect::<Vec<_>>();
1304
1305        for mut semaphores in locked_swapchain_semaphores {
1306            semaphores.set_used_fence_value(signal_value);
1307
1308            // If we're the first submission to operate on this image, wait on
1309            // its acquire semaphore, to make sure the presentation engine is
1310            // done with it.
1311            if let Some(sem) = semaphores.get_acquire_wait_semaphore() {
1312                wait_semaphores.push_wait(sem, vk::PipelineStageFlags::TOP_OF_PIPE);
1313            }
1314
1315            // Get a semaphore to signal when we're done writing to this surface
1316            // image. Presentation of this image will wait for this.
1317            let signal_semaphore = semaphores.get_submit_signal_semaphore(&self.device)?;
1318            signal_semaphores.push_signal(signal_semaphore);
1319        }
1320
1321        let mut guard = self.signal_semaphores.lock();
1322        if !guard.is_empty() {
1323            signal_semaphores.append(&mut guard);
1324        }
1325
1326        let mut wait_guard = self.wait_semaphores.lock();
1327        if !wait_guard.is_empty() {
1328            wait_semaphores.append(&mut wait_guard);
1329        }
1330
1331        // In order for submissions to be strictly ordered, we encode a dependency between each submission
1332        // using a pair of semaphores. This adds a wait if it is needed, and signals the next semaphore.
1333        let semaphore_state = self.relay_semaphores.lock().advance(&self.device)?;
1334
1335        if let Some(sem) = semaphore_state.wait {
1336            wait_semaphores.push_wait(
1337                SemaphoreType::Binary(sem),
1338                vk::PipelineStageFlags::TOP_OF_PIPE,
1339            );
1340        }
1341
1342        signal_semaphores.push_signal(SemaphoreType::Binary(semaphore_state.signal));
1343
1344        // We need to signal our wgpu::Fence if we have one, this adds it to the signal list.
1345        signal_fence.maintain(&self.device.raw)?;
1346        match *signal_fence {
1347            Fence::TimelineSemaphore(raw) => {
1348                signal_semaphores.push_signal(SemaphoreType::Timeline(raw, signal_value));
1349            }
1350            Fence::FencePool {
1351                ref mut active,
1352                ref mut free,
1353                ..
1354            } => {
1355                fence_raw = match free.pop() {
1356                    Some(raw) => raw,
1357                    None => unsafe {
1358                        self.device
1359                            .raw
1360                            .create_fence(&vk::FenceCreateInfo::default(), None)
1361                            .map_err(map_host_device_oom_err)?
1362                    },
1363                };
1364                active.push((signal_value, fence_raw));
1365            }
1366        }
1367
1368        let vk_cmd_buffers = command_buffers
1369            .iter()
1370            .map(|cmd| cmd.raw)
1371            .collect::<Vec<_>>();
1372
1373        let mut vk_info = vk::SubmitInfo::default().command_buffers(&vk_cmd_buffers);
1374        let mut vk_timeline_info = mem::MaybeUninit::uninit();
1375        vk_info = SemaphoreList::add_to_submit(
1376            &mut wait_semaphores,
1377            &mut signal_semaphores,
1378            vk_info,
1379            &mut vk_timeline_info,
1380        );
1381
1382        profiling::scope!("vkQueueSubmit");
1383        unsafe {
1384            self.device
1385                .raw
1386                .queue_submit(self.raw, &[vk_info], fence_raw)
1387                .map_err(map_host_device_oom_and_lost_err)?
1388        };
1389        Ok(())
1390    }
1391
1392    unsafe fn present(
1393        &self,
1394        surface: &Surface,
1395        texture: SurfaceTexture,
1396    ) -> Result<(), crate::SurfaceError> {
1397        let mut swapchain = surface.swapchain.write();
1398
1399        unsafe { swapchain.as_mut().unwrap().present(self, texture) }
1400    }
1401
1402    unsafe fn get_timestamp_period(&self) -> f32 {
1403        self.device.timestamp_period
1404    }
1405
1406    unsafe fn wait_for_idle(&self) -> Result<(), crate::DeviceError> {
1407        unsafe { self.device.raw.queue_wait_idle(self.raw) }
1408            .map_err(map_host_device_oom_and_lost_err)
1409    }
1410}
1411
1412impl Queue {
1413    pub fn raw_device(&self) -> &ash::Device {
1414        &self.device.raw
1415    }
1416
1417    pub fn add_signal_semaphore(&self, semaphore: vk::Semaphore, semaphore_value: Option<u64>) {
1418        let mut guard = self.signal_semaphores.lock();
1419        if let Some(value) = semaphore_value {
1420            guard.push_signal(SemaphoreType::Timeline(semaphore, value));
1421        } else {
1422            guard.push_signal(SemaphoreType::Binary(semaphore));
1423        }
1424    }
1425
1426    /// Remove `semaphore` from the pending signal list if it is still present.
1427    ///
1428    /// Returns `true` if the semaphore was found and removed. If the submit
1429    /// already consumed it, this is a harmless no-op that returns `false`.
1430    pub fn remove_signal_semaphore(&self, semaphore: vk::Semaphore) -> bool {
1431        self.signal_semaphores.lock().remove(semaphore)
1432    }
1433
1434    /// Stage a semaphore wait on the next [`crate::Queue::submit`] call.
1435    ///
1436    /// `semaphore_value` selects the kind of payload the wait targets:
1437    ///
1438    /// - `Some(value)` - wait until `semaphore` (a timeline semaphore) has been signalled to at least `value`.
1439    /// - `None` - wait on a binary semaphore signal.
1440    ///
1441    /// `stage` is the pipeline stage at which the wait blocks downstream
1442    /// work (e.g. `vk::PipelineStageFlags::TOP_OF_PIPE` to gate the
1443    /// entire submission, or a more specific stage when only that stage
1444    /// reads the synchronised resource).
1445    pub fn add_wait_semaphore(
1446        &self,
1447        semaphore: vk::Semaphore,
1448        semaphore_value: Option<u64>,
1449        stage: vk::PipelineStageFlags,
1450    ) {
1451        let mut guard = self.wait_semaphores.lock();
1452        if let Some(value) = semaphore_value {
1453            guard.push_wait(SemaphoreType::Timeline(semaphore, value), stage);
1454        } else {
1455            guard.push_wait(SemaphoreType::Binary(semaphore), stage);
1456        }
1457    }
1458
1459    /// Remove `semaphore` from the pending wait list if it is still present.
1460    ///
1461    /// Returns `true` if the semaphore was found and removed. If the submit
1462    /// already consumed it, this is a no-op that returns `false`.
1463    pub fn remove_wait_semaphore(&self, semaphore: vk::Semaphore) -> bool {
1464        self.wait_semaphores.lock().remove(semaphore)
1465    }
1466}
1467
1468/// Maps
1469///
1470/// - VK_ERROR_OUT_OF_HOST_MEMORY
1471/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1472fn map_host_device_oom_err(err: vk::Result) -> crate::DeviceError {
1473    match err {
1474        vk::Result::ERROR_OUT_OF_HOST_MEMORY | vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => {
1475            get_oom_err(err)
1476        }
1477        e => get_unexpected_err(e),
1478    }
1479}
1480
1481/// Maps
1482///
1483/// - VK_ERROR_OUT_OF_HOST_MEMORY
1484/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1485/// - VK_ERROR_DEVICE_LOST
1486fn map_host_device_oom_and_lost_err(err: vk::Result) -> crate::DeviceError {
1487    match err {
1488        vk::Result::ERROR_DEVICE_LOST => get_lost_err(),
1489        other => map_host_device_oom_err(other),
1490    }
1491}
1492
1493/// Maps
1494///
1495/// - VK_ERROR_OUT_OF_HOST_MEMORY
1496/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1497/// - VK_ERROR_FRAGMENTATION
1498fn map_host_device_oom_and_fragmentation_err(err: vk::Result) -> crate::DeviceError {
1499    match err {
1500        vk::Result::ERROR_FRAGMENTATION => get_oom_err(err),
1501        other => map_host_device_oom_err(other),
1502    }
1503}
1504
1505/// Maps
1506///
1507/// - VK_ERROR_OUT_OF_HOST_MEMORY
1508/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1509/// - VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1510fn map_host_device_oom_and_ioca_err(err: vk::Result) -> crate::DeviceError {
1511    // We don't use VK_KHR_buffer_device_address
1512    // VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1513    map_host_device_oom_err(err)
1514}
1515
1516/// Maps
1517///
1518/// - VK_ERROR_OUT_OF_HOST_MEMORY
1519fn map_host_oom_err(err: vk::Result) -> crate::DeviceError {
1520    match err {
1521        vk::Result::ERROR_OUT_OF_HOST_MEMORY => get_oom_err(err),
1522        e => get_unexpected_err(e),
1523    }
1524}
1525
1526/// Maps
1527///
1528/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1529fn map_device_oom_err(err: vk::Result) -> crate::DeviceError {
1530    match err {
1531        vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => get_oom_err(err),
1532        e => get_unexpected_err(e),
1533    }
1534}
1535
1536/// Maps
1537///
1538/// - VK_ERROR_OUT_OF_HOST_MEMORY
1539/// - VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1540fn map_host_oom_and_ioca_err(err: vk::Result) -> crate::DeviceError {
1541    // We don't use VK_KHR_buffer_device_address
1542    // VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1543    map_host_oom_err(err)
1544}
1545
1546/// Maps
1547///
1548/// - VK_ERROR_OUT_OF_HOST_MEMORY
1549/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1550/// - VK_PIPELINE_COMPILE_REQUIRED_EXT
1551/// - VK_ERROR_INVALID_SHADER_NV
1552fn map_pipeline_err(err: vk::Result) -> crate::DeviceError {
1553    // We don't use VK_EXT_pipeline_creation_cache_control
1554    // VK_PIPELINE_COMPILE_REQUIRED_EXT
1555    // We don't use VK_NV_glsl_shader
1556    // VK_ERROR_INVALID_SHADER_NV
1557    map_host_device_oom_err(err)
1558}
1559
1560/// Returns [`crate::DeviceError::Unexpected`] or panics if the `internal_error_panic`
1561/// feature flag is enabled.
1562fn get_unexpected_err(_err: vk::Result) -> crate::DeviceError {
1563    #[cfg(feature = "internal_error_panic")]
1564    panic!("Unexpected Vulkan error: {_err:?}");
1565
1566    #[allow(unreachable_code)]
1567    crate::DeviceError::Unexpected
1568}
1569
1570/// Returns [`crate::DeviceError::OutOfMemory`].
1571fn get_oom_err(_err: vk::Result) -> crate::DeviceError {
1572    crate::DeviceError::OutOfMemory
1573}
1574
1575/// Returns [`crate::DeviceError::Lost`] or panics if the `device_lost_panic`
1576/// feature flag is enabled.
1577fn get_lost_err() -> crate::DeviceError {
1578    #[cfg(feature = "device_lost_panic")]
1579    panic!("Device lost");
1580
1581    #[allow(unreachable_code)]
1582    crate::DeviceError::Lost
1583}
1584
1585#[derive(Clone, Copy, Pod, Zeroable)]
1586#[repr(C)]
1587struct RawTlasInstance {
1588    transform: [f32; 12],
1589    custom_data_and_mask: u32,
1590    shader_binding_table_record_offset_and_flags: u32,
1591    acceleration_structure_reference: u64,
1592}
1593
1594/// Arguments to the [`CreateDeviceCallback`].
1595pub struct CreateDeviceCallbackArgs<'arg, 'pnext, 'this>
1596where
1597    'this: 'pnext,
1598{
1599    /// The extensions to enable for the device. You must not remove anything from this list,
1600    /// but you may add to it.
1601    pub extensions: &'arg mut Vec<&'static CStr>,
1602    /// The physical device features to enable. You may enable features, but must not disable any.
1603    pub device_features: &'arg mut PhysicalDeviceFeatures,
1604    /// The queue create infos for the device. You may add or modify queue create infos as needed.
1605    pub queue_create_infos: &'arg mut Vec<vk::DeviceQueueCreateInfo<'pnext>>,
1606    /// The create info for the device. You may add or modify things in the pnext chain, but
1607    /// do not turn features off. Additionally, do not add things to the list of extensions,
1608    /// or to the feature set, as all changes to that member will be overwritten.
1609    pub create_info: &'arg mut vk::DeviceCreateInfo<'pnext>,
1610    /// We need to have `'this` in the struct, so we can declare that all lifetimes coming from
1611    /// captures in the closure will live longer (and hence satisfy) `'pnext`. However, we
1612    /// don't actually directly use `'this`
1613    _phantom: PhantomData<&'this ()>,
1614}
1615
1616/// Callback to allow changing the vulkan device creation parameters.
1617///
1618/// # Safety:
1619/// - If you want to add extensions, add the to the `Vec<'static CStr>` not the create info,
1620///   as the create info value will be overwritten.
1621/// - Callback must not remove features.
1622/// - Callback must not change anything to what the instance does not support.
1623pub type CreateDeviceCallback<'this> =
1624    dyn for<'arg, 'pnext> FnOnce(CreateDeviceCallbackArgs<'arg, 'pnext, 'this>) + 'this;
1625
1626/// Arguments to the [`CreateInstanceCallback`].
1627pub struct CreateInstanceCallbackArgs<'arg, 'pnext, 'this>
1628where
1629    'this: 'pnext,
1630{
1631    /// The extensions to enable for the instance. You must not remove anything from this list,
1632    /// but you may add to it.
1633    pub extensions: &'arg mut Vec<&'static CStr>,
1634    /// The create info for the instance. You may add or modify things in the pnext chain, but
1635    /// do not turn features off. Additionally, do not add things to the list of extensions,
1636    /// all changes to that member will be overwritten.
1637    pub create_info: &'arg mut vk::InstanceCreateInfo<'pnext>,
1638    /// Vulkan entry point.
1639    pub entry: &'arg ash::Entry,
1640    /// We need to have `'this` in the struct, so we can declare that all lifetimes coming from
1641    /// captures in the closure will live longer (and hence satisfy) `'pnext`. However, we
1642    /// don't actually directly use `'this`
1643    _phantom: PhantomData<&'this ()>,
1644}
1645
1646/// Callback to allow changing the vulkan instance creation parameters.
1647///
1648/// # Safety:
1649/// - If you want to add extensions, add the to the `Vec<'static CStr>` not the create info,
1650///   as the create info value will be overwritten.
1651/// - Callback must not remove features.
1652/// - Callback must not change anything to what the instance does not support.
1653pub type CreateInstanceCallback<'this> =
1654    dyn for<'arg, 'pnext> FnOnce(CreateInstanceCallbackArgs<'arg, 'pnext, 'this>) + 'this;