wgpu_hal/vulkan/
mod.rs

1/*!
2# Vulkan API internals.
3
4## Stack memory
5
6Ash expects slices, which we don't generally have available.
7We cope with this requirement by the combination of the following ways:
8  - temporarily allocating `Vec` on heap, where overhead is permitted
9  - growing temporary local storage
10
11## Framebuffers and Render passes
12
13Render passes are cached on the device and kept forever.
14
15Framebuffers are also cached on the device, but they are removed when
16any of the image views (they have) gets removed.
17If Vulkan supports image-less framebuffers,
18then the actual views are excluded from the framebuffer key.
19
20## Fences
21
22If timeline semaphores are available, they are used 1:1 with wgpu-hal fences.
23Otherwise, we manage a pool of `VkFence` objects behind each `hal::Fence`.
24
25!*/
26
27mod adapter;
28mod command;
29pub mod conv;
30mod descriptor;
31mod device;
32mod drm;
33mod instance;
34mod sampler;
35mod semaphore_list;
36mod swapchain;
37
38pub use adapter::PhysicalDeviceFeatures;
39
40use alloc::{boxed::Box, ffi::CString, sync::Arc, vec::Vec};
41use core::{borrow::Borrow, ffi::CStr, fmt, marker::PhantomData, mem, num::NonZeroU32};
42
43use arrayvec::ArrayVec;
44use ash::{ext, khr, vk};
45use bytemuck::{Pod, Zeroable};
46use hashbrown::HashSet;
47use parking_lot::{Mutex, RwLock};
48
49use naga::FastHashMap;
50use wgt::InternalCounter;
51
52use semaphore_list::SemaphoreList;
53
54use crate::vulkan::semaphore_list::{SemaphoreListMode, SemaphoreType};
55
56const MAX_TOTAL_ATTACHMENTS: usize = crate::MAX_COLOR_ATTACHMENTS * 2 + 1;
57
58#[derive(Clone, Debug)]
59pub struct Api;
60
61impl crate::Api for Api {
62    const VARIANT: wgt::Backend = wgt::Backend::Vulkan;
63
64    type Instance = Instance;
65    type Surface = Surface;
66    type Adapter = Adapter;
67    type Device = Device;
68
69    type Queue = Queue;
70    type CommandEncoder = CommandEncoder;
71    type CommandBuffer = CommandBuffer;
72
73    type Buffer = Buffer;
74    type Texture = Texture;
75    type SurfaceTexture = SurfaceTexture;
76    type TextureView = TextureView;
77    type Sampler = Sampler;
78    type QuerySet = QuerySet;
79    type Fence = Fence;
80    type AccelerationStructure = AccelerationStructure;
81    type PipelineCache = PipelineCache;
82
83    type BindGroupLayout = BindGroupLayout;
84    type BindGroup = BindGroup;
85    type PipelineLayout = PipelineLayout;
86    type ShaderModule = ShaderModule;
87    type RenderPipeline = RenderPipeline;
88    type ComputePipeline = ComputePipeline;
89}
90
91crate::impl_dyn_resource!(
92    Adapter,
93    AccelerationStructure,
94    BindGroup,
95    BindGroupLayout,
96    Buffer,
97    CommandBuffer,
98    CommandEncoder,
99    ComputePipeline,
100    Device,
101    Fence,
102    Instance,
103    PipelineCache,
104    PipelineLayout,
105    QuerySet,
106    Queue,
107    RenderPipeline,
108    Sampler,
109    ShaderModule,
110    Surface,
111    SurfaceTexture,
112    Texture,
113    TextureView
114);
115
116struct DebugUtils {
117    extension: ext::debug_utils::Instance,
118    messenger: vk::DebugUtilsMessengerEXT,
119
120    /// Owning pointer to the debug messenger callback user data.
121    ///
122    /// `InstanceShared::drop` destroys the debug messenger before
123    /// dropping this, so the callback should never receive a dangling
124    /// user data pointer.
125    #[allow(dead_code)]
126    callback_data: Box<DebugUtilsMessengerUserData>,
127}
128
129pub struct DebugUtilsCreateInfo {
130    severity: vk::DebugUtilsMessageSeverityFlagsEXT,
131    message_type: vk::DebugUtilsMessageTypeFlagsEXT,
132    callback_data: Box<DebugUtilsMessengerUserData>,
133}
134
135#[derive(Debug)]
136/// The properties related to the validation layer needed for the
137/// DebugUtilsMessenger for their workarounds
138struct ValidationLayerProperties {
139    /// Validation layer description, from `vk::LayerProperties`.
140    layer_description: CString,
141
142    /// Validation layer specification version, from `vk::LayerProperties`.
143    layer_spec_version: u32,
144}
145
146/// User data needed by `instance::debug_utils_messenger_callback`.
147///
148/// When we create the [`vk::DebugUtilsMessengerEXT`], the `pUserData`
149/// pointer refers to one of these values.
150#[derive(Debug)]
151pub struct DebugUtilsMessengerUserData {
152    /// The properties related to the validation layer, if present
153    validation_layer_properties: Option<ValidationLayerProperties>,
154
155    /// If the OBS layer is present. OBS never increments the version of their layer,
156    /// so there's no reason to have the version.
157    has_obs_layer: bool,
158}
159
160pub struct InstanceShared {
161    raw: ash::Instance,
162    extensions: Vec<&'static CStr>,
163    flags: wgt::InstanceFlags,
164    memory_budget_thresholds: wgt::MemoryBudgetThresholds,
165    debug_utils: Option<DebugUtils>,
166    get_physical_device_properties: Option<khr::get_physical_device_properties2::Instance>,
167    entry: ash::Entry,
168    has_nv_optimus: bool,
169    android_sdk_version: u32,
170    /// The instance API version.
171    ///
172    /// Which is the version of Vulkan supported for instance-level functionality.
173    ///
174    /// It is associated with a `VkInstance` and its children,
175    /// except for a `VkPhysicalDevice` and its children.
176    instance_api_version: u32,
177
178    // The `drop_guard` field must be the last field of this struct so it is dropped last.
179    // Do not add new fields after it.
180    drop_guard: Option<crate::DropGuard>,
181}
182
183pub struct Instance {
184    shared: Arc<InstanceShared>,
185}
186
187pub struct Surface {
188    swapchain: RwLock<Option<Box<dyn swapchain::Swapchain>>>,
189    inner: Box<dyn swapchain::Surface>,
190}
191
192impl Surface {
193    /// Returns the raw Vulkan surface handle.
194    ///
195    /// Returns `None` if the surface is a DXGI surface.
196    pub unsafe fn raw_native_handle(&self) -> Option<vk::SurfaceKHR> {
197        Some(
198            self.inner
199                .as_any()
200                .downcast_ref::<swapchain::NativeSurface>()?
201                .as_raw(),
202        )
203    }
204
205    /// Get the raw Vulkan swapchain associated with this surface.
206    ///
207    /// Returns [`None`] if the surface is not configured or if the swapchain
208    /// is a DXGI swapchain.
209    pub fn raw_native_swapchain(&self) -> Option<vk::SwapchainKHR> {
210        let read = self.swapchain.read();
211        Some(
212            read.as_ref()?
213                .as_any()
214                .downcast_ref::<swapchain::NativeSwapchain>()?
215                .as_raw(),
216        )
217    }
218
219    /// Set the present timing information which will be used for the next [presentation](crate::Queue::present()) of this surface,
220    /// using [VK_GOOGLE_display_timing].
221    ///
222    /// This can be used to give an id to presentations, for future use of [`vk::PastPresentationTimingGOOGLE`].
223    /// Note that `wgpu-hal` does *not* provide a way to use that API - you should manually access this through [`ash`].
224    ///
225    /// This can also be used to add a "not before" timestamp to the presentation.
226    ///
227    /// The exact semantics of the fields are also documented in the [specification](https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPresentTimeGOOGLE.html) for the extension.
228    ///
229    /// # Panics
230    ///
231    /// - If the surface hasn't been configured.
232    /// - If the surface has been configured for a DXGI swapchain.
233    /// - If the device doesn't [support present timing](wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING).
234    ///
235    /// [VK_GOOGLE_display_timing]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_GOOGLE_display_timing.html
236    #[track_caller]
237    pub fn set_next_present_time(&self, present_timing: vk::PresentTimeGOOGLE) {
238        let mut swapchain = self.swapchain.write();
239        swapchain
240            .as_mut()
241            .expect("Surface should have been configured")
242            .as_any_mut()
243            .downcast_mut::<swapchain::NativeSwapchain>()
244            .expect("Surface should have a native Vulkan swapchain")
245            .set_next_present_time(present_timing);
246    }
247}
248
249#[derive(Debug)]
250pub struct SurfaceTexture {
251    index: u32,
252    texture: Texture,
253    metadata: Box<dyn swapchain::SurfaceTextureMetadata>,
254}
255
256impl crate::DynSurfaceTexture for SurfaceTexture {}
257
258impl Borrow<Texture> for SurfaceTexture {
259    fn borrow(&self) -> &Texture {
260        &self.texture
261    }
262}
263
264impl Borrow<dyn crate::DynTexture> for SurfaceTexture {
265    fn borrow(&self) -> &dyn crate::DynTexture {
266        &self.texture
267    }
268}
269
270pub struct Adapter {
271    raw: vk::PhysicalDevice,
272    instance: Arc<InstanceShared>,
273    //queue_families: Vec<vk::QueueFamilyProperties>,
274    known_memory_flags: vk::MemoryPropertyFlags,
275    phd_capabilities: adapter::PhysicalDeviceProperties,
276    phd_features: PhysicalDeviceFeatures,
277    downlevel_flags: wgt::DownlevelFlags,
278    private_caps: PrivateCapabilities,
279    workarounds: Workarounds,
280}
281
282// TODO there's no reason why this can't be unified--the function pointers should all be the same--it's not clear how to do this with `ash`.
283enum ExtensionFn<T> {
284    /// The loaded function pointer struct for an extension.
285    Extension(T),
286    /// The extension was promoted to a core version of Vulkan and the functions on `ash`'s `DeviceV1_x` traits should be used.
287    Promoted,
288}
289
290struct DeviceExtensionFunctions {
291    debug_utils: Option<ext::debug_utils::Device>,
292    draw_indirect_count: Option<khr::draw_indirect_count::Device>,
293    timeline_semaphore: Option<ExtensionFn<khr::timeline_semaphore::Device>>,
294    ray_tracing: Option<RayTracingDeviceExtensionFunctions>,
295    mesh_shading: Option<ext::mesh_shader::Device>,
296    #[cfg_attr(not(unix), allow(dead_code))]
297    external_memory_fd: Option<khr::external_memory_fd::Device>,
298}
299
300struct RayTracingDeviceExtensionFunctions {
301    acceleration_structure: khr::acceleration_structure::Device,
302    buffer_device_address: khr::buffer_device_address::Device,
303}
304
305/// Set of internal capabilities, which don't show up in the exposed
306/// device geometry, but affect the code paths taken internally.
307#[derive(Clone, Debug)]
308struct PrivateCapabilities {
309    image_view_usage: bool,
310    timeline_semaphores: bool,
311    texture_d24: bool,
312    texture_d24_s8: bool,
313    texture_s8: bool,
314    /// Ability to present contents to any screen. Only needed to work around broken platform configurations.
315    can_present: bool,
316    non_coherent_map_mask: wgt::BufferAddress,
317    multi_draw_indirect: bool,
318    max_draw_indirect_count: u32,
319
320    /// True if this adapter advertises the [`robustBufferAccess`][vrba] feature.
321    ///
322    /// Note that Vulkan's `robustBufferAccess` is not sufficient to implement
323    /// `wgpu_hal`'s guarantee that shaders will not access buffer contents via
324    /// a given bindgroup binding outside that binding's [accessible
325    /// region][ar]. Enabling `robustBufferAccess` does ensure that
326    /// out-of-bounds reads and writes are not undefined behavior (that's good),
327    /// but still permits out-of-bounds reads to return data from anywhere
328    /// within the buffer, not just the accessible region.
329    ///
330    /// [ar]: ../struct.BufferBinding.html#accessible-region
331    /// [vrba]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#features-robustBufferAccess
332    robust_buffer_access: bool,
333
334    robust_image_access: bool,
335
336    /// True if this adapter supports the [`VK_EXT_robustness2`] extension's
337    /// [`robustBufferAccess2`] feature.
338    ///
339    /// This is sufficient to implement `wgpu_hal`'s [required bounds-checking][ar] of
340    /// shader accesses to buffer contents. If this feature is not available,
341    /// this backend must have Naga inject bounds checks in the generated
342    /// SPIR-V.
343    ///
344    /// [`VK_EXT_robustness2`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_EXT_robustness2.html
345    /// [`robustBufferAccess2`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPhysicalDeviceRobustness2FeaturesEXT.html#features-robustBufferAccess2
346    /// [ar]: ../struct.BufferBinding.html#accessible-region
347    robust_buffer_access2: bool,
348
349    robust_image_access2: bool,
350    zero_initialize_workgroup_memory: bool,
351    image_format_list: bool,
352    maximum_samplers: u32,
353
354    /// True if this adapter supports the [`VK_KHR_shader_integer_dot_product`] extension
355    /// (promoted to Vulkan 1.3).
356    ///
357    /// This is used to generate optimized code for WGSL's `dot4{I, U}8Packed`.
358    ///
359    /// [`VK_KHR_shader_integer_dot_product`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_KHR_shader_integer_dot_product.html
360    shader_integer_dot_product: bool,
361
362    /// True if this adapter supports 8-bit integers provided by the
363    /// [`VK_KHR_shader_float16_int8`] extension (promoted to Vulkan 1.2).
364    ///
365    /// Allows shaders to declare the "Int8" capability. Note, however, that this
366    /// feature alone allows the use of 8-bit integers "only in the `Private`,
367    /// `Workgroup` (for non-Block variables), and `Function` storage classes"
368    /// ([see spec]). To use 8-bit integers in the interface storage classes (e.g.,
369    /// `StorageBuffer`), you also need to enable the corresponding feature in
370    /// `VkPhysicalDevice8BitStorageFeatures` and declare the corresponding SPIR-V
371    /// capability (e.g., `StorageBuffer8BitAccess`).
372    ///
373    /// [`VK_KHR_shader_float16_int8`]: https://registry.khronos.org/vulkan/specs/latest/man/html/VK_KHR_shader_float16_int8.html
374    /// [see spec]: https://registry.khronos.org/vulkan/specs/latest/man/html/VkPhysicalDeviceShaderFloat16Int8Features.html#extension-features-shaderInt8
375    shader_int8: bool,
376
377    /// This is done to panic before undefined behavior, and is imperfect.
378    /// Basically, to allow implementations to emulate mv using instancing, if you
379    /// want to draw `n` instances to VR, you must draw `2n` instances, but you
380    /// can never draw more than `u32::MAX` instances. Therefore, when drawing
381    /// multiview on some vulkan implementations, it might restrict the instance
382    /// count, which isn't usually a thing in webgpu. We don't expose this limit
383    /// because its strange, i.e. only occurs on certain vulkan implementations
384    /// if you are drawing more than 128 million instances. We still want to avoid
385    /// undefined behavior in this situation, so we panic if the limit is violated.
386    multiview_instance_index_limit: u32,
387
388    /// BufferUsages::ACCELERATION_STRUCTURE_SCRATCH allows usage as a scratch buffer.
389    /// Vulkan has no way to specify this as a usage, and it maps to other usages, but
390    /// these usages do not have as high of an alignment requirement using the buffer as
391    ///  a scratch buffer when building acceleration structures.
392    scratch_buffer_alignment: u32,
393}
394
395bitflags::bitflags!(
396    /// Workaround flags.
397    #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
398    pub struct Workarounds: u32 {
399        /// Only generate SPIR-V for one entry point at a time.
400        const SEPARATE_ENTRY_POINTS = 0x1;
401        /// Qualcomm OOMs when there are zero color attachments but a non-null pointer
402        /// to a subpass resolve attachment array. This nulls out that pointer in that case.
403        const EMPTY_RESOLVE_ATTACHMENT_LISTS = 0x2;
404        /// If the following code returns false, then nvidia will end up filling the wrong range.
405        ///
406        /// ```skip
407        /// fn nvidia_succeeds() -> bool {
408        ///   # let (copy_length, start_offset) = (0, 0);
409        ///     if copy_length >= 4096 {
410        ///         if start_offset % 16 != 0 {
411        ///             if copy_length == 4096 {
412        ///                 return true;
413        ///             }
414        ///             if copy_length % 16 == 0 {
415        ///                 return false;
416        ///             }
417        ///         }
418        ///     }
419        ///     true
420        /// }
421        /// ```
422        ///
423        /// As such, we need to make sure all calls to vkCmdFillBuffer are aligned to 16 bytes
424        /// if they cover a range of 4096 bytes or more.
425        const FORCE_FILL_BUFFER_WITH_SIZE_GREATER_4096_ALIGNED_OFFSET_16 = 0x4;
426    }
427);
428
429#[derive(Clone, Debug, Eq, Hash, PartialEq)]
430struct AttachmentKey {
431    format: vk::Format,
432    layout: vk::ImageLayout,
433    ops: crate::AttachmentOps,
434}
435
436impl AttachmentKey {
437    /// Returns an attachment key for a compatible attachment.
438    fn compatible(format: vk::Format, layout: vk::ImageLayout) -> Self {
439        Self {
440            format,
441            layout,
442            ops: crate::AttachmentOps::all(),
443        }
444    }
445}
446
447#[derive(Clone, Eq, Hash, PartialEq)]
448struct ColorAttachmentKey {
449    base: AttachmentKey,
450    resolve: Option<AttachmentKey>,
451}
452
453#[derive(Clone, Eq, Hash, PartialEq)]
454struct DepthStencilAttachmentKey {
455    base: AttachmentKey,
456    stencil_ops: crate::AttachmentOps,
457}
458
459#[derive(Clone, Eq, Default, Hash, PartialEq)]
460struct RenderPassKey {
461    colors: ArrayVec<Option<ColorAttachmentKey>, { crate::MAX_COLOR_ATTACHMENTS }>,
462    depth_stencil: Option<DepthStencilAttachmentKey>,
463    sample_count: u32,
464    multiview_mask: Option<NonZeroU32>,
465}
466
467struct DeviceShared {
468    raw: ash::Device,
469    family_index: u32,
470    queue_index: u32,
471    raw_queue: vk::Queue,
472    instance: Arc<InstanceShared>,
473    physical_device: vk::PhysicalDevice,
474    enabled_extensions: Vec<&'static CStr>,
475    extension_fns: DeviceExtensionFunctions,
476    vendor_id: u32,
477    pipeline_cache_validation_key: [u8; 16],
478    timestamp_period: f32,
479    private_caps: PrivateCapabilities,
480    workarounds: Workarounds,
481    features: wgt::Features,
482    render_passes: Mutex<FastHashMap<RenderPassKey, vk::RenderPass>>,
483    sampler_cache: Mutex<sampler::SamplerCache>,
484    memory_allocations_counter: InternalCounter,
485
486    /// Because we have cached framebuffers which are not deleted from until
487    /// the device is destroyed, if the implementation of vulkan re-uses handles
488    /// we need some way to differentiate between the old handle and the new handle.
489    /// This factory allows us to have a dedicated identity value for each texture.
490    texture_identity_factory: ResourceIdentityFactory<vk::Image>,
491    /// As above, for texture views.
492    texture_view_identity_factory: ResourceIdentityFactory<vk::ImageView>,
493
494    empty_descriptor_set_layout: vk::DescriptorSetLayout,
495
496    // The `drop_guard` field must be the last field of this struct so it is dropped last.
497    // Do not add new fields after it.
498    drop_guard: Option<crate::DropGuard>,
499}
500
501impl Drop for DeviceShared {
502    fn drop(&mut self) {
503        for &raw in self.render_passes.lock().values() {
504            unsafe { self.raw.destroy_render_pass(raw, None) };
505        }
506        unsafe {
507            self.raw
508                .destroy_descriptor_set_layout(self.empty_descriptor_set_layout, None)
509        };
510        if self.drop_guard.is_none() {
511            unsafe { self.raw.destroy_device(None) };
512        }
513    }
514}
515
516pub struct Device {
517    mem_allocator: Mutex<gpu_allocator::vulkan::Allocator>,
518    desc_allocator: Mutex<descriptor::DescriptorAllocator>,
519    valid_ash_memory_types: u32,
520    naga_options: naga::back::spv::Options<'static>,
521    #[cfg(feature = "renderdoc")]
522    render_doc: crate::auxil::renderdoc::RenderDoc,
523    counters: Arc<wgt::HalCounters>,
524    // Struct members are dropped from first to last, put the Device last to ensure that
525    // all resources that depends on it are destroyed before it like the mem_allocator
526    shared: Arc<DeviceShared>,
527}
528
529impl Drop for Device {
530    fn drop(&mut self) {}
531}
532
533/// Semaphores for forcing queue submissions to run in order.
534///
535/// The [`wgpu_hal::Queue`] trait promises that if two calls to [`submit`] are
536/// ordered, then the first submission will finish on the GPU before the second
537/// submission begins. To get this behavior on Vulkan we need to pass semaphores
538/// to [`vkQueueSubmit`] for the commands to wait on before beginning execution,
539/// and to signal when their execution is done.
540///
541/// Normally this can be done with a single semaphore, waited on and then
542/// signalled for each submission. At any given time there's exactly one
543/// submission that would signal the semaphore, and exactly one waiting on it,
544/// as Vulkan requires.
545///
546/// However, as of Oct 2021, bug [#5508] in the Mesa ANV drivers caused them to
547/// hang if we use a single semaphore. The workaround is to alternate between
548/// two semaphores. The bug has been fixed in Mesa, but we should probably keep
549/// the workaround until, say, Oct 2026.
550///
551/// [`wgpu_hal::Queue`]: crate::Queue
552/// [`submit`]: crate::Queue::submit
553/// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
554/// [#5508]: https://gitlab.freedesktop.org/mesa/mesa/-/issues/5508
555#[derive(Clone)]
556struct RelaySemaphores {
557    /// The semaphore the next submission should wait on before beginning
558    /// execution on the GPU. This is `None` for the first submission, which
559    /// should not wait on anything at all.
560    wait: Option<vk::Semaphore>,
561
562    /// The semaphore the next submission should signal when it has finished
563    /// execution on the GPU.
564    signal: vk::Semaphore,
565}
566
567impl RelaySemaphores {
568    fn new(device: &DeviceShared) -> Result<Self, crate::DeviceError> {
569        Ok(Self {
570            wait: None,
571            signal: device.new_binary_semaphore("RelaySemaphores: 1")?,
572        })
573    }
574
575    /// Advances the semaphores, returning the semaphores that should be used for a submission.
576    fn advance(&mut self, device: &DeviceShared) -> Result<Self, crate::DeviceError> {
577        let old = self.clone();
578
579        // Build the state for the next submission.
580        match self.wait {
581            None => {
582                // The `old` values describe the first submission to this queue.
583                // The second submission should wait on `old.signal`, and then
584                // signal a new semaphore which we'll create now.
585                self.wait = Some(old.signal);
586                self.signal = device.new_binary_semaphore("RelaySemaphores: 2")?;
587            }
588            Some(ref mut wait) => {
589                // What this submission signals, the next should wait.
590                mem::swap(wait, &mut self.signal);
591            }
592        };
593
594        Ok(old)
595    }
596
597    /// Destroys the semaphores.
598    unsafe fn destroy(&self, device: &ash::Device) {
599        unsafe {
600            if let Some(wait) = self.wait {
601                device.destroy_semaphore(wait, None);
602            }
603            device.destroy_semaphore(self.signal, None);
604        }
605    }
606}
607
608pub struct Queue {
609    raw: vk::Queue,
610    device: Arc<DeviceShared>,
611    family_index: u32,
612    relay_semaphores: Mutex<RelaySemaphores>,
613    signal_semaphores: Mutex<SemaphoreList>,
614    wait_semaphores: Mutex<SemaphoreList>,
615}
616
617impl Queue {
618    pub fn as_raw(&self) -> vk::Queue {
619        self.raw
620    }
621}
622
623impl Drop for Queue {
624    fn drop(&mut self) {
625        unsafe { self.relay_semaphores.lock().destroy(&self.device.raw) };
626    }
627}
628#[derive(Debug)]
629enum BufferMemoryBacking {
630    Managed(gpu_allocator::vulkan::Allocation),
631    VulkanMemory {
632        memory: vk::DeviceMemory,
633        offset: u64,
634        size: u64,
635    },
636}
637impl BufferMemoryBacking {
638    fn memory(&self) -> vk::DeviceMemory {
639        match self {
640            Self::Managed(m) => unsafe { m.memory() },
641            Self::VulkanMemory { memory, .. } => *memory,
642        }
643    }
644    fn offset(&self) -> u64 {
645        match self {
646            Self::Managed(m) => m.offset(),
647            Self::VulkanMemory { offset, .. } => *offset,
648        }
649    }
650    fn size(&self) -> u64 {
651        match self {
652            Self::Managed(m) => m.size(),
653            Self::VulkanMemory { size, .. } => *size,
654        }
655    }
656}
657/// Describes who owns a [`Buffer`]'s `vk::Buffer` handle and its backing memory,
658/// and therefore what cleanup is required when the buffer is destroyed.
659#[derive(Debug)]
660enum BufferOwnership {
661    /// wgpu-hal owns the `vk::Buffer` and its backing memory. On cleanup the buffer
662    /// handle is destroyed and the memory is released.
663    Managed(Mutex<BufferMemoryBacking>),
664    /// wgpu-hal owns the `vk::Buffer` handle but the backing memory is kept alive
665    /// by the caller. On cleanup only the buffer handle is destroyed.
666    RawHandle,
667    /// Caller owns the `vk::Buffer` and its backing memory. On cleanup the
668    /// [`crate::DropGuard`] runs the caller's cleanup callback and wgpu-hal touches
669    /// neither the handle nor the memory.
670    External(crate::DropGuard),
671}
672
673#[derive(Debug)]
674pub struct Buffer {
675    raw: vk::Buffer,
676
677    // This field must be last, because it may contain a `DropGuard` which needs to be dropped after all other fields.
678    ownership: BufferOwnership,
679}
680impl Buffer {
681    /// # Safety
682    ///
683    /// - `vk_buffer`'s memory must be managed by the caller
684    /// - Externally imported buffers can't be mapped by `wgpu`
685    pub unsafe fn from_raw(vk_buffer: vk::Buffer) -> Self {
686        Self {
687            raw: vk_buffer,
688            ownership: BufferOwnership::RawHandle,
689        }
690    }
691
692    /// # Safety
693    /// - `vk_buffer` must outlive the returned `Buffer`.
694    /// - wgpu-hal will NOT call `vkDestroyBuffer`; the caller remains responsible for the buffer handle's destruction.
695    ///   The `drop_callback` runs when the `Buffer` drops and may be used to release caller-side bookkeeping.
696    /// - Externally imported buffers can't be mapped by `wgpu`.
697    pub unsafe fn from_raw_externally_owned(
698        vk_buffer: vk::Buffer,
699        drop_callback: crate::DropCallback,
700    ) -> Self {
701        Self {
702            raw: vk_buffer,
703            ownership: BufferOwnership::External(crate::DropGuard::new(drop_callback)),
704        }
705    }
706
707    /// # Safety
708    /// - We will use this buffer and the buffer's backing memory range as if we have exclusive ownership over it, until the wgpu resource is dropped and the wgpu-hal object is cleaned up
709    /// - Externally imported buffers can't be mapped by `wgpu`
710    /// - `offset` and `size` must be valid with the allocation of `memory`
711    pub unsafe fn from_raw_managed(
712        vk_buffer: vk::Buffer,
713        memory: vk::DeviceMemory,
714        offset: u64,
715        size: u64,
716    ) -> Self {
717        Self {
718            raw: vk_buffer,
719            ownership: BufferOwnership::Managed(Mutex::new(BufferMemoryBacking::VulkanMemory {
720                memory,
721                offset,
722                size,
723            })),
724        }
725    }
726
727    /// # Safety
728    /// - The buffer handle must not be manually destroyed
729    pub unsafe fn raw_handle(&self) -> vk::Buffer {
730        self.raw
731    }
732}
733
734impl crate::DynBuffer for Buffer {}
735
736#[derive(Debug)]
737pub struct AccelerationStructure {
738    raw: vk::AccelerationStructureKHR,
739    buffer: vk::Buffer,
740    allocation: gpu_allocator::vulkan::Allocation,
741    compacted_size_query: Option<vk::QueryPool>,
742}
743
744impl crate::DynAccelerationStructure for AccelerationStructure {}
745
746#[derive(Debug)]
747pub enum TextureMemory {
748    // shared memory in GPU allocator (owned by wgpu-hal)
749    Allocation(gpu_allocator::vulkan::Allocation),
750
751    // dedicated memory (owned by wgpu-hal)
752    Dedicated(vk::DeviceMemory),
753
754    // memory not owned by wgpu
755    External,
756}
757
758#[derive(Debug)]
759pub struct Texture {
760    raw: vk::Image,
761    memory: TextureMemory,
762    format: wgt::TextureFormat,
763    copy_size: crate::CopyExtent,
764    identity: ResourceIdentity<vk::Image>,
765
766    // The `drop_guard` field must be the last field of this struct so it is dropped last.
767    // Do not add new fields after it.
768    drop_guard: Option<crate::DropGuard>,
769}
770
771impl crate::DynTexture for Texture {}
772
773impl Texture {
774    /// # Safety
775    ///
776    /// - The image handle must not be manually destroyed
777    pub unsafe fn raw_handle(&self) -> vk::Image {
778        self.raw
779    }
780
781    /// # Safety
782    ///
783    /// - The caller must not free the `vk::DeviceMemory` or
784    ///   `gpu_alloc::MemoryBlock` in the returned `TextureMemory`.
785    pub unsafe fn memory(&self) -> &TextureMemory {
786        &self.memory
787    }
788}
789
790#[derive(Debug)]
791pub struct TextureView {
792    raw_texture: vk::Image,
793    raw: vk::ImageView,
794    _layers: NonZeroU32,
795    format: wgt::TextureFormat,
796    raw_format: vk::Format,
797    base_mip_level: u32,
798    dimension: wgt::TextureViewDimension,
799    texture_identity: ResourceIdentity<vk::Image>,
800    view_identity: ResourceIdentity<vk::ImageView>,
801}
802
803impl crate::DynTextureView for TextureView {}
804
805impl TextureView {
806    /// # Safety
807    ///
808    /// - The image view handle must not be manually destroyed
809    pub unsafe fn raw_handle(&self) -> vk::ImageView {
810        self.raw
811    }
812
813    /// Returns the raw texture view, along with its identity.
814    fn identified_raw_view(&self) -> IdentifiedTextureView {
815        IdentifiedTextureView {
816            raw: self.raw,
817            identity: self.view_identity,
818        }
819    }
820}
821
822#[derive(Debug)]
823pub struct Sampler {
824    raw: vk::Sampler,
825    create_info: vk::SamplerCreateInfo<'static>,
826}
827
828impl crate::DynSampler for Sampler {}
829
830/// Information about a binding within a specific BindGroupLayout / BindGroup.
831/// This will be used to construct a [`naga::back::spv::BindingInfo`], where
832/// the descriptor set value will be taken from the index of the group.
833#[derive(Copy, Clone, Debug)]
834struct BindingInfo {
835    binding: u32,
836    binding_array_size: Option<NonZeroU32>,
837}
838
839#[derive(Debug)]
840pub struct BindGroupLayout {
841    raw: vk::DescriptorSetLayout,
842    desc_count: descriptor::DescriptorCounts,
843    /// Sorted list of entries.
844    entries: Box<[wgt::BindGroupLayoutEntry]>,
845    /// Map of original binding index to remapped binding index and optional
846    /// array size.
847    binding_map: Vec<(u32, BindingInfo)>,
848    contains_binding_arrays: bool,
849}
850
851impl crate::DynBindGroupLayout for BindGroupLayout {}
852
853#[derive(Debug)]
854pub struct PipelineLayout {
855    raw: vk::PipelineLayout,
856    binding_map: naga::back::spv::BindingMap,
857}
858
859impl crate::DynPipelineLayout for PipelineLayout {}
860
861#[derive(Debug)]
862pub struct BindGroup {
863    set: descriptor::DescriptorSet,
864}
865
866impl crate::DynBindGroup for BindGroup {}
867
868/// Miscellaneous allocation recycling pool for `CommandAllocator`.
869#[derive(Default)]
870struct Temp {
871    marker: Vec<u8>,
872    buffer_barriers: Vec<vk::BufferMemoryBarrier<'static>>,
873    image_barriers: Vec<vk::ImageMemoryBarrier<'static>>,
874}
875
876impl Temp {
877    fn clear(&mut self) {
878        self.marker.clear();
879        self.buffer_barriers.clear();
880        self.image_barriers.clear();
881    }
882
883    fn make_c_str(&mut self, name: &str) -> &CStr {
884        self.marker.clear();
885        self.marker.extend_from_slice(name.as_bytes());
886        self.marker.push(0);
887        unsafe { CStr::from_bytes_with_nul_unchecked(&self.marker) }
888    }
889}
890
891/// Generates unique IDs for each resource of type `T`.
892///
893/// Because vk handles are not permanently unique, this
894/// provides a way to generate unique IDs for each resource.
895struct ResourceIdentityFactory<T> {
896    #[cfg(not(target_has_atomic = "64"))]
897    next_id: Mutex<u64>,
898    #[cfg(target_has_atomic = "64")]
899    next_id: core::sync::atomic::AtomicU64,
900    _phantom: PhantomData<T>,
901}
902
903impl<T> ResourceIdentityFactory<T> {
904    fn new() -> Self {
905        Self {
906            #[cfg(not(target_has_atomic = "64"))]
907            next_id: Mutex::new(0),
908            #[cfg(target_has_atomic = "64")]
909            next_id: core::sync::atomic::AtomicU64::new(0),
910            _phantom: PhantomData,
911        }
912    }
913
914    /// Returns a new unique ID for a resource of type `T`.
915    fn next(&self) -> ResourceIdentity<T> {
916        #[cfg(not(target_has_atomic = "64"))]
917        {
918            let mut next_id = self.next_id.lock();
919            let id = *next_id;
920            *next_id += 1;
921            ResourceIdentity {
922                id,
923                _phantom: PhantomData,
924            }
925        }
926
927        #[cfg(target_has_atomic = "64")]
928        ResourceIdentity {
929            id: self
930                .next_id
931                .fetch_add(1, core::sync::atomic::Ordering::Relaxed),
932            _phantom: PhantomData,
933        }
934    }
935}
936
937/// A unique identifier for a resource of type `T`.
938///
939/// This is used as a hashable key for resources, which
940/// is permanently unique through the lifetime of the program.
941#[derive(Debug, Copy, Clone, Eq, Hash, PartialEq)]
942struct ResourceIdentity<T> {
943    id: u64,
944    _phantom: PhantomData<T>,
945}
946
947#[derive(Clone, Eq, Hash, PartialEq)]
948struct FramebufferKey {
949    raw_pass: vk::RenderPass,
950    /// Because this is used as a key in a hash map, we need to include the identity
951    /// so that this hashes differently, even if the ImageView handles are the same
952    /// between different views.
953    attachment_identities: ArrayVec<ResourceIdentity<vk::ImageView>, { MAX_TOTAL_ATTACHMENTS }>,
954    /// While this is redundant for calculating the hash, we need access to an array
955    /// of all the raw ImageViews when we are creating the actual framebuffer,
956    /// so we store this here.
957    attachment_views: ArrayVec<vk::ImageView, { MAX_TOTAL_ATTACHMENTS }>,
958    extent: wgt::Extent3d,
959}
960
961impl FramebufferKey {
962    fn push_view(&mut self, view: IdentifiedTextureView) {
963        self.attachment_identities.push(view.identity);
964        self.attachment_views.push(view.raw);
965    }
966}
967
968/// A texture view paired with its identity.
969#[derive(Copy, Clone)]
970struct IdentifiedTextureView {
971    raw: vk::ImageView,
972    identity: ResourceIdentity<vk::ImageView>,
973}
974
975#[derive(Clone, Eq, Hash, PartialEq)]
976struct TempTextureViewKey {
977    texture: vk::Image,
978    /// As this is used in a hashmap, we need to
979    /// include the identity so that this hashes differently,
980    /// even if the Image handles are the same between different images.
981    texture_identity: ResourceIdentity<vk::Image>,
982    format: vk::Format,
983    mip_level: u32,
984    depth_slice: u32,
985}
986
987pub struct CommandEncoder {
988    raw: vk::CommandPool,
989    device: Arc<DeviceShared>,
990
991    /// The current command buffer, if `self` is in the ["recording"]
992    /// state.
993    ///
994    /// ["recording"]: crate::CommandEncoder
995    ///
996    /// If non-`null`, the buffer is in the Vulkan "recording" state.
997    active: vk::CommandBuffer,
998
999    /// What kind of pass we are currently within: compute or render.
1000    bind_point: vk::PipelineBindPoint,
1001
1002    /// Allocation recycling pool for this encoder.
1003    temp: Temp,
1004
1005    /// A pool of available command buffers.
1006    ///
1007    /// These are all in the Vulkan "initial" state.
1008    free: Vec<vk::CommandBuffer>,
1009
1010    /// A pool of discarded command buffers.
1011    ///
1012    /// These could be in any Vulkan state except "pending".
1013    discarded: Vec<vk::CommandBuffer>,
1014
1015    /// If this is true, the active renderpass enabled a debug span,
1016    /// and needs to be disabled on renderpass close.
1017    rpass_debug_marker_active: bool,
1018
1019    /// If set, the end of the next render/compute pass will write a timestamp at
1020    /// the given pool & location.
1021    end_of_pass_timer_query: Option<(vk::QueryPool, u32)>,
1022
1023    framebuffers: FastHashMap<FramebufferKey, vk::Framebuffer>,
1024    temp_texture_views: FastHashMap<TempTextureViewKey, IdentifiedTextureView>,
1025
1026    counters: Arc<wgt::HalCounters>,
1027
1028    current_pipeline_is_multiview: bool,
1029}
1030
1031impl Drop for CommandEncoder {
1032    fn drop(&mut self) {
1033        // SAFETY:
1034        //
1035        // VUID-vkDestroyCommandPool-commandPool-00041: wgpu_hal requires that a
1036        // `CommandBuffer` must live until its execution is complete, and that a
1037        // `CommandBuffer` must not outlive the `CommandEncoder` that built it.
1038        // Thus, we know that none of our `CommandBuffers` are in the "pending"
1039        // state.
1040        //
1041        // The other VUIDs are pretty obvious.
1042        unsafe {
1043            // `vkDestroyCommandPool` also frees any command buffers allocated
1044            // from that pool, so there's no need to explicitly call
1045            // `vkFreeCommandBuffers` on `cmd_encoder`'s `free` and `discarded`
1046            // fields.
1047            self.device.raw.destroy_command_pool(self.raw, None);
1048        }
1049
1050        for (_, fb) in self.framebuffers.drain() {
1051            unsafe { self.device.raw.destroy_framebuffer(fb, None) };
1052        }
1053
1054        for (_, view) in self.temp_texture_views.drain() {
1055            unsafe { self.device.raw.destroy_image_view(view.raw, None) };
1056        }
1057
1058        self.counters.command_encoders.sub(1);
1059    }
1060}
1061
1062impl CommandEncoder {
1063    /// # Safety
1064    ///
1065    /// - The command buffer handle must not be manually destroyed
1066    pub unsafe fn raw_handle(&self) -> vk::CommandBuffer {
1067        self.active
1068    }
1069}
1070
1071impl fmt::Debug for CommandEncoder {
1072    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1073        f.debug_struct("CommandEncoder")
1074            .field("raw", &self.raw)
1075            .finish()
1076    }
1077}
1078
1079#[derive(Debug)]
1080pub struct CommandBuffer {
1081    raw: vk::CommandBuffer,
1082}
1083
1084impl crate::DynCommandBuffer for CommandBuffer {}
1085
1086#[derive(Debug)]
1087pub enum ShaderModule {
1088    Raw(vk::ShaderModule),
1089    Intermediate {
1090        naga_shader: crate::NagaShader,
1091        runtime_checks: wgt::ShaderRuntimeChecks,
1092    },
1093}
1094
1095impl crate::DynShaderModule for ShaderModule {}
1096
1097#[derive(Debug)]
1098pub struct RenderPipeline {
1099    raw: vk::Pipeline,
1100    is_multiview: bool,
1101}
1102
1103impl crate::DynRenderPipeline for RenderPipeline {}
1104
1105#[derive(Debug)]
1106pub struct ComputePipeline {
1107    raw: vk::Pipeline,
1108}
1109
1110impl crate::DynComputePipeline for ComputePipeline {}
1111
1112#[derive(Debug)]
1113pub struct PipelineCache {
1114    raw: vk::PipelineCache,
1115}
1116
1117impl crate::DynPipelineCache for PipelineCache {}
1118
1119#[derive(Debug)]
1120pub struct QuerySet {
1121    raw: vk::QueryPool,
1122}
1123
1124impl crate::DynQuerySet for QuerySet {}
1125
1126/// The [`Api::Fence`] type for [`vulkan::Api`].
1127///
1128/// This is an `enum` because there are two possible implementations of
1129/// `wgpu-hal` fences on Vulkan: Vulkan fences, which work on any version of
1130/// Vulkan, and Vulkan timeline semaphores, which are easier and cheaper but
1131/// require non-1.0 features.
1132///
1133/// [`Device::create_fence`] returns a [`TimelineSemaphore`] if
1134/// [`VK_KHR_timeline_semaphore`] is available and enabled, and a [`FencePool`]
1135/// otherwise.
1136///
1137/// [`Api::Fence`]: crate::Api::Fence
1138/// [`vulkan::Api`]: Api
1139/// [`Device::create_fence`]: crate::Device::create_fence
1140/// [`TimelineSemaphore`]: Fence::TimelineSemaphore
1141/// [`VK_KHR_timeline_semaphore`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VK_KHR_timeline_semaphore
1142/// [`FencePool`]: Fence::FencePool
1143#[derive(Debug)]
1144pub enum Fence {
1145    /// A Vulkan [timeline semaphore].
1146    ///
1147    /// These are simpler to use than Vulkan fences, since timeline semaphores
1148    /// work exactly the way [`wpgu_hal::Api::Fence`] is specified to work.
1149    ///
1150    /// [timeline semaphore]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-semaphores
1151    /// [`wpgu_hal::Api::Fence`]: crate::Api::Fence
1152    TimelineSemaphore(vk::Semaphore),
1153
1154    /// A collection of Vulkan [fence]s, each associated with a [`FenceValue`].
1155    ///
1156    /// The effective [`FenceValue`] of this variant is the greater of
1157    /// `last_completed` and the maximum value associated with a signalled fence
1158    /// in `active`.
1159    ///
1160    /// Fences are available in all versions of Vulkan, but since they only have
1161    /// two states, "signaled" and "unsignaled", we need to use a separate fence
1162    /// for each queue submission we might want to wait for, and remember which
1163    /// [`FenceValue`] each one represents.
1164    ///
1165    /// One should keep the fence pool read while there are any references to the
1166    /// fences inside of them. This ensures there are no race conditions when
1167    /// resetting the fences
1168    ///
1169    /// [fence]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-fences
1170    /// [`FenceValue`]: crate::FenceValue
1171    FencePool(RwLock<FencePool>),
1172}
1173
1174/// A shared fence type. The arc is expect to have a ref-count of one once a function has finished being called
1175///
1176/// A fence should have access synchronised as fence resetting might happen at any point. Resetting checks the ref-count
1177/// of the fence, so instead of copying the fence, it should have its `Arc` container cloned which shows not to reset
1178/// this fence as it is being used.
1179pub(super) type SynchronizedFence = Arc<vk::Fence>;
1180
1181#[derive(Debug)]
1182pub struct FencePool {
1183    last_completed: crate::FenceValue,
1184    /// The pending fence values have to be ascending.
1185    active: Vec<(crate::FenceValue, SynchronizedFence)>,
1186    // Don't need extra synchronisation around the fences here, if they are used they should be put into active.
1187    free: Vec<vk::Fence>,
1188}
1189
1190impl crate::DynFence for Fence {}
1191
1192impl Fence {
1193    /// Return the highest [`FenceValue`] among the signalled fences in `active`.
1194    ///
1195    /// As an optimization, assume that we already know that the fence has
1196    /// reached `last_completed`, and don't bother checking fences whose values
1197    /// are less than that: those fences remain in the `active` array only
1198    /// because we haven't called `maintain` yet to clean them up.
1199    ///
1200    /// [`FenceValue`]: crate::FenceValue
1201    fn check_active(
1202        device: &ash::Device,
1203        mut last_completed: crate::FenceValue,
1204        active: &[(crate::FenceValue, SynchronizedFence)],
1205    ) -> Result<crate::FenceValue, crate::DeviceError> {
1206        for &(value, ref raw) in active.iter() {
1207            unsafe {
1208                if value > last_completed
1209                    && device
1210                        // Don't need to clone as active should be from a read or
1211                        // write lock which means this is already synchronised.
1212                        .get_fence_status(**raw)
1213                        .map_err(map_host_device_oom_and_lost_err)?
1214                {
1215                    last_completed = value;
1216                }
1217            }
1218        }
1219        Ok(last_completed)
1220    }
1221
1222    /// Return the highest signalled [`FenceValue`] for `self`.
1223    ///
1224    /// [`FenceValue`]: crate::FenceValue
1225    fn get_latest(
1226        &self,
1227        device: &ash::Device,
1228        extension: Option<&ExtensionFn<khr::timeline_semaphore::Device>>,
1229    ) -> Result<crate::FenceValue, crate::DeviceError> {
1230        match *self {
1231            Self::TimelineSemaphore(raw) => unsafe {
1232                Ok(match *extension.unwrap() {
1233                    ExtensionFn::Extension(ref ext) => ext
1234                        .get_semaphore_counter_value(raw)
1235                        .map_err(map_host_device_oom_and_lost_err)?,
1236                    ExtensionFn::Promoted => device
1237                        .get_semaphore_counter_value(raw)
1238                        .map_err(map_host_device_oom_and_lost_err)?,
1239                })
1240            },
1241            Self::FencePool(ref pool) => {
1242                let FencePool {
1243                    last_completed,
1244                    ref active,
1245                    free: _,
1246                } = *pool.read();
1247                Self::check_active(device, last_completed, active)
1248            }
1249        }
1250    }
1251
1252    /// Trim the internal state of this [`Fence`].
1253    ///
1254    /// This function has no externally visible effect, but you should call it
1255    /// periodically to keep this fence's resource consumption under control.
1256    ///
1257    /// For fences using the [`FencePool`] implementation, this function
1258    /// recycles fences that have been signaled. If you don't call this,
1259    /// [`Queue::submit`] will just keep allocating a new Vulkan fence every
1260    /// time it's called.
1261    ///
1262    /// [`FencePool`]: Fence::FencePool
1263    /// [`Queue::submit`]: crate::Queue::submit
1264    fn maintain(&self, device: &ash::Device) -> Result<(), crate::DeviceError> {
1265        match *self {
1266            Self::TimelineSemaphore(_) => {}
1267            Self::FencePool(ref pool) => {
1268                let FencePool {
1269                    ref mut last_completed,
1270                    ref mut active,
1271                    ref mut free,
1272                } = *pool.write();
1273
1274                let base_free = free.len();
1275                let latest = Self::check_active(device, *last_completed, active)?;
1276
1277                active.retain_mut(|&mut (value, ref mut fence)| {
1278                    if value > latest {
1279                        true
1280                    } else if let Some(fence) = Arc::get_mut(fence) {
1281                        // No other references to these, so we have exclusive access. Add them to free and reset them later,
1282                        // but drop them from active immediately
1283                        free.push(*fence);
1284                        false
1285                    } else {
1286                        // some other function is using it. Although this shouldn't be to long,
1287                        // maintain shouldn't block, and it should be cleared up by the next time it happens
1288                        true
1289                    }
1290                });
1291
1292                if free.len() != base_free {
1293                    unsafe { device.reset_fences(&free[base_free..]) }
1294                        .map_err(map_device_oom_err)?
1295                }
1296                *last_completed = latest;
1297            }
1298        }
1299        Ok(())
1300    }
1301}
1302
1303impl crate::Queue for Queue {
1304    type A = Api;
1305
1306    unsafe fn submit(
1307        &self,
1308        command_buffers: &[&CommandBuffer],
1309        surface_textures: &[&SurfaceTexture],
1310        (signal_fence, signal_value): (&Fence, crate::FenceValue),
1311    ) -> Result<(), crate::DeviceError> {
1312        let mut fence_raw = vk::Fence::null();
1313
1314        let mut wait_semaphores = SemaphoreList::new(SemaphoreListMode::Wait);
1315        let mut signal_semaphores = SemaphoreList::new(SemaphoreListMode::Signal);
1316
1317        // Double check that the same swapchain image isn't being given to us multiple times,
1318        // as that will deadlock when we try to lock them all.
1319        debug_assert!(
1320            {
1321                let mut check = HashSet::with_capacity(surface_textures.len());
1322                // We compare the Box by pointer, as Eq isn't well defined for SurfaceSemaphores.
1323                for st in surface_textures {
1324                    let ptr: *const () = <*const _>::cast(&*st.metadata);
1325                    check.insert(ptr as usize);
1326                }
1327                check.len() == surface_textures.len()
1328            },
1329            "More than one surface texture is being used from the same swapchain. This will cause a deadlock in release."
1330        );
1331
1332        let locked_swapchain_semaphores = surface_textures
1333            .iter()
1334            .map(|st| st.metadata.get_semaphore_guard())
1335            .collect::<Vec<_>>();
1336
1337        for mut semaphores in locked_swapchain_semaphores {
1338            semaphores.set_used_fence_value(signal_value);
1339
1340            // If we're the first submission to operate on this image, wait on
1341            // its acquire semaphore, to make sure the presentation engine is
1342            // done with it.
1343            if let Some(sem) = semaphores.get_acquire_wait_semaphore() {
1344                wait_semaphores.push_wait(sem, vk::PipelineStageFlags::TOP_OF_PIPE);
1345            }
1346
1347            // Get a semaphore to signal when we're done writing to this surface
1348            // image. Presentation of this image will wait for this.
1349            let signal_semaphore = semaphores.get_submit_signal_semaphore(&self.device)?;
1350            signal_semaphores.push_signal(signal_semaphore);
1351        }
1352
1353        let mut guard = self.signal_semaphores.lock();
1354        if !guard.is_empty() {
1355            signal_semaphores.append(&mut guard);
1356        }
1357
1358        let mut wait_guard = self.wait_semaphores.lock();
1359        if !wait_guard.is_empty() {
1360            wait_semaphores.append(&mut wait_guard);
1361        }
1362
1363        // In order for submissions to be strictly ordered, we encode a dependency between each submission
1364        // using a pair of semaphores. This adds a wait if it is needed, and signals the next semaphore.
1365        let semaphore_state = self.relay_semaphores.lock().advance(&self.device)?;
1366
1367        if let Some(sem) = semaphore_state.wait {
1368            wait_semaphores.push_wait(
1369                SemaphoreType::Binary(sem),
1370                vk::PipelineStageFlags::TOP_OF_PIPE,
1371            );
1372        }
1373
1374        signal_semaphores.push_signal(SemaphoreType::Binary(semaphore_state.signal));
1375
1376        // We need to signal our wgpu::Fence if we have one, this adds it to the signal list.
1377        signal_fence.maintain(&self.device.raw)?;
1378        // Keeping the Arc around is probably unneeded - the fence should never be signaled as it was reset,
1379        // and newer submits should not happen until this submit is done. Therefore, it should be too high
1380        // to be reset.
1381        let shared_fence;
1382        match *signal_fence {
1383            Fence::TimelineSemaphore(raw) => {
1384                signal_semaphores.push_signal(SemaphoreType::Timeline(raw, signal_value));
1385            }
1386            Fence::FencePool(ref pool) => {
1387                let FencePool {
1388                    ref mut active,
1389                    ref mut free,
1390                    ..
1391                } = *pool.write();
1392                shared_fence = match free.pop() {
1393                    Some(raw) => Arc::new(raw),
1394                    None => unsafe {
1395                        let fence = self
1396                            .device
1397                            .raw
1398                            .create_fence(&vk::FenceCreateInfo::default(), None)
1399                            .map_err(map_host_device_oom_err)?;
1400                        Arc::new(fence)
1401                    },
1402                };
1403                fence_raw = *shared_fence;
1404                active.push((signal_value, shared_fence.clone()));
1405            }
1406        }
1407
1408        let vk_cmd_buffers = command_buffers
1409            .iter()
1410            .map(|cmd| cmd.raw)
1411            .collect::<Vec<_>>();
1412
1413        let mut vk_info = vk::SubmitInfo::default().command_buffers(&vk_cmd_buffers);
1414        let mut vk_timeline_info = mem::MaybeUninit::uninit();
1415        vk_info = SemaphoreList::add_to_submit(
1416            &mut wait_semaphores,
1417            &mut signal_semaphores,
1418            vk_info,
1419            &mut vk_timeline_info,
1420        );
1421
1422        profiling::scope!("vkQueueSubmit");
1423        unsafe {
1424            self.device
1425                .raw
1426                .queue_submit(self.raw, &[vk_info], fence_raw)
1427                .map_err(map_host_device_oom_and_lost_err)?
1428        };
1429        Ok(())
1430    }
1431
1432    unsafe fn present(
1433        &self,
1434        surface: &Surface,
1435        texture: SurfaceTexture,
1436    ) -> Result<(), crate::SurfaceError> {
1437        let mut swapchain = surface.swapchain.write();
1438
1439        unsafe { swapchain.as_mut().unwrap().present(self, texture) }
1440    }
1441
1442    unsafe fn get_timestamp_period(&self) -> f32 {
1443        self.device.timestamp_period
1444    }
1445
1446    unsafe fn wait_for_idle(&self) -> Result<(), crate::DeviceError> {
1447        unsafe { self.device.raw.queue_wait_idle(self.raw) }
1448            .map_err(map_host_device_oom_and_lost_err)
1449    }
1450}
1451
1452impl Queue {
1453    pub fn raw_device(&self) -> &ash::Device {
1454        &self.device.raw
1455    }
1456
1457    pub fn add_signal_semaphore(&self, semaphore: vk::Semaphore, semaphore_value: Option<u64>) {
1458        let mut guard = self.signal_semaphores.lock();
1459        if let Some(value) = semaphore_value {
1460            guard.push_signal(SemaphoreType::Timeline(semaphore, value));
1461        } else {
1462            guard.push_signal(SemaphoreType::Binary(semaphore));
1463        }
1464    }
1465
1466    /// Remove `semaphore` from the pending signal list if it is still present.
1467    ///
1468    /// Returns `true` if the semaphore was found and removed. If the submit
1469    /// already consumed it, this is a harmless no-op that returns `false`.
1470    pub fn remove_signal_semaphore(&self, semaphore: vk::Semaphore) -> bool {
1471        self.signal_semaphores.lock().remove(semaphore)
1472    }
1473
1474    /// Stage a semaphore wait on the next [`crate::Queue::submit`] call.
1475    ///
1476    /// `semaphore_value` selects the kind of payload the wait targets:
1477    ///
1478    /// - `Some(value)` - wait until `semaphore` (a timeline semaphore) has been signalled to at least `value`.
1479    /// - `None` - wait on a binary semaphore signal.
1480    ///
1481    /// `stage` is the pipeline stage at which the wait blocks downstream
1482    /// work (e.g. `vk::PipelineStageFlags::TOP_OF_PIPE` to gate the
1483    /// entire submission, or a more specific stage when only that stage
1484    /// reads the synchronised resource).
1485    pub fn add_wait_semaphore(
1486        &self,
1487        semaphore: vk::Semaphore,
1488        semaphore_value: Option<u64>,
1489        stage: vk::PipelineStageFlags,
1490    ) {
1491        let mut guard = self.wait_semaphores.lock();
1492        if let Some(value) = semaphore_value {
1493            guard.push_wait(SemaphoreType::Timeline(semaphore, value), stage);
1494        } else {
1495            guard.push_wait(SemaphoreType::Binary(semaphore), stage);
1496        }
1497    }
1498
1499    /// Remove `semaphore` from the pending wait list if it is still present.
1500    ///
1501    /// Returns `true` if the semaphore was found and removed. If the submit
1502    /// already consumed it, this is a no-op that returns `false`.
1503    pub fn remove_wait_semaphore(&self, semaphore: vk::Semaphore) -> bool {
1504        self.wait_semaphores.lock().remove(semaphore)
1505    }
1506}
1507
1508/// Maps
1509///
1510/// - VK_ERROR_OUT_OF_HOST_MEMORY
1511/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1512fn map_host_device_oom_err(err: vk::Result) -> crate::DeviceError {
1513    match err {
1514        vk::Result::ERROR_OUT_OF_HOST_MEMORY | vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => {
1515            get_oom_err(err)
1516        }
1517        e => get_unexpected_err(e),
1518    }
1519}
1520
1521/// Maps
1522///
1523/// - VK_ERROR_OUT_OF_HOST_MEMORY
1524/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1525/// - VK_ERROR_DEVICE_LOST
1526fn map_host_device_oom_and_lost_err(err: vk::Result) -> crate::DeviceError {
1527    match err {
1528        vk::Result::ERROR_DEVICE_LOST => get_lost_err(),
1529        other => map_host_device_oom_err(other),
1530    }
1531}
1532
1533/// Maps
1534///
1535/// - VK_ERROR_OUT_OF_HOST_MEMORY
1536/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1537/// - VK_ERROR_FRAGMENTATION
1538fn map_host_device_oom_and_fragmentation_err(err: vk::Result) -> crate::DeviceError {
1539    match err {
1540        vk::Result::ERROR_FRAGMENTATION => get_oom_err(err),
1541        other => map_host_device_oom_err(other),
1542    }
1543}
1544
1545/// Maps
1546///
1547/// - VK_ERROR_OUT_OF_HOST_MEMORY
1548/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1549/// - VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1550fn map_host_device_oom_and_ioca_err(err: vk::Result) -> crate::DeviceError {
1551    // We don't use VK_KHR_buffer_device_address
1552    // VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1553    map_host_device_oom_err(err)
1554}
1555
1556/// Maps
1557///
1558/// - VK_ERROR_OUT_OF_HOST_MEMORY
1559fn map_host_oom_err(err: vk::Result) -> crate::DeviceError {
1560    match err {
1561        vk::Result::ERROR_OUT_OF_HOST_MEMORY => get_oom_err(err),
1562        e => get_unexpected_err(e),
1563    }
1564}
1565
1566/// Maps
1567///
1568/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1569fn map_device_oom_err(err: vk::Result) -> crate::DeviceError {
1570    match err {
1571        vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => get_oom_err(err),
1572        e => get_unexpected_err(e),
1573    }
1574}
1575
1576/// Maps
1577///
1578/// - VK_ERROR_OUT_OF_HOST_MEMORY
1579/// - VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1580fn map_host_oom_and_ioca_err(err: vk::Result) -> crate::DeviceError {
1581    // We don't use VK_KHR_buffer_device_address
1582    // VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1583    map_host_oom_err(err)
1584}
1585
1586/// Maps
1587///
1588/// - VK_ERROR_OUT_OF_HOST_MEMORY
1589/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1590/// - VK_PIPELINE_COMPILE_REQUIRED_EXT
1591/// - VK_ERROR_INVALID_SHADER_NV
1592fn map_pipeline_err(err: vk::Result) -> crate::DeviceError {
1593    // We don't use VK_EXT_pipeline_creation_cache_control
1594    // VK_PIPELINE_COMPILE_REQUIRED_EXT
1595    // We don't use VK_NV_glsl_shader
1596    // VK_ERROR_INVALID_SHADER_NV
1597    map_host_device_oom_err(err)
1598}
1599
1600/// Returns [`crate::DeviceError::Unexpected`] or panics if the `internal_error_panic`
1601/// feature flag is enabled.
1602fn get_unexpected_err(_err: vk::Result) -> crate::DeviceError {
1603    #[cfg(feature = "internal_error_panic")]
1604    panic!("Unexpected Vulkan error: {_err:?}");
1605
1606    #[allow(unreachable_code)]
1607    crate::DeviceError::Unexpected
1608}
1609
1610/// Returns [`crate::DeviceError::OutOfMemory`].
1611fn get_oom_err(_err: vk::Result) -> crate::DeviceError {
1612    crate::DeviceError::OutOfMemory
1613}
1614
1615/// Returns [`crate::DeviceError::Lost`] or panics if the `device_lost_panic`
1616/// feature flag is enabled.
1617fn get_lost_err() -> crate::DeviceError {
1618    #[cfg(feature = "device_lost_panic")]
1619    panic!("Device lost");
1620
1621    #[allow(unreachable_code)]
1622    crate::DeviceError::Lost
1623}
1624
1625#[derive(Clone, Copy, Pod, Zeroable)]
1626#[repr(C)]
1627struct RawTlasInstance {
1628    transform: [f32; 12],
1629    custom_data_and_mask: u32,
1630    shader_binding_table_record_offset_and_flags: u32,
1631    acceleration_structure_reference: u64,
1632}
1633
1634/// Arguments to the [`CreateDeviceCallback`].
1635pub struct CreateDeviceCallbackArgs<'arg, 'pnext, 'this>
1636where
1637    'this: 'pnext,
1638{
1639    /// The extensions to enable for the device. You must not remove anything from this list,
1640    /// but you may add to it.
1641    pub extensions: &'arg mut Vec<&'static CStr>,
1642    /// The physical device features to enable. You may enable features, but must not disable any.
1643    pub device_features: &'arg mut PhysicalDeviceFeatures,
1644    /// The queue create infos for the device. You may add or modify queue create infos as needed.
1645    pub queue_create_infos: &'arg mut Vec<vk::DeviceQueueCreateInfo<'pnext>>,
1646    /// The create info for the device. You may add or modify things in the pnext chain, but
1647    /// do not turn features off. Additionally, do not add things to the list of extensions,
1648    /// or to the feature set, as all changes to that member will be overwritten.
1649    pub create_info: &'arg mut vk::DeviceCreateInfo<'pnext>,
1650    /// We need to have `'this` in the struct, so we can declare that all lifetimes coming from
1651    /// captures in the closure will live longer (and hence satisfy) `'pnext`. However, we
1652    /// don't actually directly use `'this`
1653    _phantom: PhantomData<&'this ()>,
1654}
1655
1656/// Callback to allow changing the vulkan device creation parameters.
1657///
1658/// # Safety:
1659/// - If you want to add extensions, add the to the `Vec<'static CStr>` not the create info,
1660///   as the create info value will be overwritten.
1661/// - Callback must not remove features.
1662/// - Callback must not change anything to what the instance does not support.
1663pub type CreateDeviceCallback<'this> =
1664    dyn for<'arg, 'pnext> FnOnce(CreateDeviceCallbackArgs<'arg, 'pnext, 'this>) + 'this;
1665
1666/// Arguments to the [`CreateInstanceCallback`].
1667pub struct CreateInstanceCallbackArgs<'arg, 'pnext, 'this>
1668where
1669    'this: 'pnext,
1670{
1671    /// The extensions to enable for the instance. You must not remove anything from this list,
1672    /// but you may add to it.
1673    pub extensions: &'arg mut Vec<&'static CStr>,
1674    /// The create info for the instance. You may add or modify things in the pnext chain, but
1675    /// do not turn features off. Additionally, do not add things to the list of extensions,
1676    /// all changes to that member will be overwritten.
1677    pub create_info: &'arg mut vk::InstanceCreateInfo<'pnext>,
1678    /// Vulkan entry point.
1679    pub entry: &'arg ash::Entry,
1680    /// We need to have `'this` in the struct, so we can declare that all lifetimes coming from
1681    /// captures in the closure will live longer (and hence satisfy) `'pnext`. However, we
1682    /// don't actually directly use `'this`
1683    _phantom: PhantomData<&'this ()>,
1684}
1685
1686/// Callback to allow changing the vulkan instance creation parameters.
1687///
1688/// # Safety:
1689/// - If you want to add extensions, add the to the `Vec<'static CStr>` not the create info,
1690///   as the create info value will be overwritten.
1691/// - Callback must not remove features.
1692/// - Callback must not change anything to what the instance does not support.
1693pub type CreateInstanceCallback<'this> =
1694    dyn for<'arg, 'pnext> FnOnce(CreateInstanceCallbackArgs<'arg, 'pnext, 'this>) + 'this;