wgpu_hal/vulkan/
mod.rs

1/*!
2# Vulkan API internals.
3
4## Stack memory
5
6Ash expects slices, which we don't generally have available.
7We cope with this requirement by the combination of the following ways:
8  - temporarily allocating `Vec` on heap, where overhead is permitted
9  - growing temporary local storage
10
11## Framebuffers and Render passes
12
13Render passes are cached on the device and kept forever.
14
15Framebuffers are also cached on the device, but they are removed when
16any of the image views (they have) gets removed.
17If Vulkan supports image-less framebuffers,
18then the actual views are excluded from the framebuffer key.
19
20## Fences
21
22If timeline semaphores are available, they are used 1:1 with wgpu-hal fences.
23Otherwise, we manage a pool of `VkFence` objects behind each `hal::Fence`.
24
25!*/
26
27mod adapter;
28mod command;
29pub mod conv;
30mod device;
31mod drm;
32mod instance;
33mod sampler;
34mod semaphore_list;
35mod swapchain;
36
37pub use adapter::PhysicalDeviceFeatures;
38
39use alloc::{boxed::Box, ffi::CString, sync::Arc, vec::Vec};
40use core::{
41    borrow::Borrow,
42    ffi::CStr,
43    fmt,
44    marker::PhantomData,
45    mem::{self, ManuallyDrop},
46    num::NonZeroU32,
47};
48
49use arrayvec::ArrayVec;
50use ash::{ext, khr, vk};
51use bytemuck::{Pod, Zeroable};
52use hashbrown::HashSet;
53use parking_lot::{Mutex, RwLock};
54
55use naga::FastHashMap;
56use wgt::InternalCounter;
57
58use semaphore_list::SemaphoreList;
59
60use crate::vulkan::semaphore_list::{SemaphoreListMode, SemaphoreType};
61
62const MAX_TOTAL_ATTACHMENTS: usize = crate::MAX_COLOR_ATTACHMENTS * 2 + 1;
63
64#[derive(Clone, Debug)]
65pub struct Api;
66
67impl crate::Api for Api {
68    const VARIANT: wgt::Backend = wgt::Backend::Vulkan;
69
70    type Instance = Instance;
71    type Surface = Surface;
72    type Adapter = Adapter;
73    type Device = Device;
74
75    type Queue = Queue;
76    type CommandEncoder = CommandEncoder;
77    type CommandBuffer = CommandBuffer;
78
79    type Buffer = Buffer;
80    type Texture = Texture;
81    type SurfaceTexture = SurfaceTexture;
82    type TextureView = TextureView;
83    type Sampler = Sampler;
84    type QuerySet = QuerySet;
85    type Fence = Fence;
86    type AccelerationStructure = AccelerationStructure;
87    type PipelineCache = PipelineCache;
88
89    type BindGroupLayout = BindGroupLayout;
90    type BindGroup = BindGroup;
91    type PipelineLayout = PipelineLayout;
92    type ShaderModule = ShaderModule;
93    type RenderPipeline = RenderPipeline;
94    type ComputePipeline = ComputePipeline;
95}
96
97crate::impl_dyn_resource!(
98    Adapter,
99    AccelerationStructure,
100    BindGroup,
101    BindGroupLayout,
102    Buffer,
103    CommandBuffer,
104    CommandEncoder,
105    ComputePipeline,
106    Device,
107    Fence,
108    Instance,
109    PipelineCache,
110    PipelineLayout,
111    QuerySet,
112    Queue,
113    RenderPipeline,
114    Sampler,
115    ShaderModule,
116    Surface,
117    SurfaceTexture,
118    Texture,
119    TextureView
120);
121
122struct DebugUtils {
123    extension: ext::debug_utils::Instance,
124    messenger: vk::DebugUtilsMessengerEXT,
125
126    /// Owning pointer to the debug messenger callback user data.
127    ///
128    /// `InstanceShared::drop` destroys the debug messenger before
129    /// dropping this, so the callback should never receive a dangling
130    /// user data pointer.
131    #[allow(dead_code)]
132    callback_data: Box<DebugUtilsMessengerUserData>,
133}
134
135pub struct DebugUtilsCreateInfo {
136    severity: vk::DebugUtilsMessageSeverityFlagsEXT,
137    message_type: vk::DebugUtilsMessageTypeFlagsEXT,
138    callback_data: Box<DebugUtilsMessengerUserData>,
139}
140
141#[derive(Debug)]
142/// The properties related to the validation layer needed for the
143/// DebugUtilsMessenger for their workarounds
144struct ValidationLayerProperties {
145    /// Validation layer description, from `vk::LayerProperties`.
146    layer_description: CString,
147
148    /// Validation layer specification version, from `vk::LayerProperties`.
149    layer_spec_version: u32,
150}
151
152/// User data needed by `instance::debug_utils_messenger_callback`.
153///
154/// When we create the [`vk::DebugUtilsMessengerEXT`], the `pUserData`
155/// pointer refers to one of these values.
156#[derive(Debug)]
157pub struct DebugUtilsMessengerUserData {
158    /// The properties related to the validation layer, if present
159    validation_layer_properties: Option<ValidationLayerProperties>,
160
161    /// If the OBS layer is present. OBS never increments the version of their layer,
162    /// so there's no reason to have the version.
163    has_obs_layer: bool,
164}
165
166pub struct InstanceShared {
167    raw: ash::Instance,
168    extensions: Vec<&'static CStr>,
169    flags: wgt::InstanceFlags,
170    memory_budget_thresholds: wgt::MemoryBudgetThresholds,
171    debug_utils: Option<DebugUtils>,
172    get_physical_device_properties: Option<khr::get_physical_device_properties2::Instance>,
173    entry: ash::Entry,
174    has_nv_optimus: bool,
175    android_sdk_version: u32,
176    /// The instance API version.
177    ///
178    /// Which is the version of Vulkan supported for instance-level functionality.
179    ///
180    /// It is associated with a `VkInstance` and its children,
181    /// except for a `VkPhysicalDevice` and its children.
182    instance_api_version: u32,
183
184    // The `drop_guard` field must be the last field of this struct so it is dropped last.
185    // Do not add new fields after it.
186    drop_guard: Option<crate::DropGuard>,
187}
188
189pub struct Instance {
190    shared: Arc<InstanceShared>,
191}
192
193pub struct Surface {
194    inner: ManuallyDrop<Box<dyn swapchain::Surface>>,
195    swapchain: RwLock<Option<Box<dyn swapchain::Swapchain>>>,
196}
197
198impl Surface {
199    /// Returns the raw Vulkan surface handle.
200    ///
201    /// Returns `None` if the surface is a DXGI surface.
202    pub unsafe fn raw_native_handle(&self) -> Option<vk::SurfaceKHR> {
203        Some(
204            self.inner
205                .as_any()
206                .downcast_ref::<swapchain::NativeSurface>()?
207                .as_raw(),
208        )
209    }
210
211    /// Get the raw Vulkan swapchain associated with this surface.
212    ///
213    /// Returns [`None`] if the surface is not configured or if the swapchain
214    /// is a DXGI swapchain.
215    pub fn raw_native_swapchain(&self) -> Option<vk::SwapchainKHR> {
216        let read = self.swapchain.read();
217        Some(
218            read.as_ref()?
219                .as_any()
220                .downcast_ref::<swapchain::NativeSwapchain>()?
221                .as_raw(),
222        )
223    }
224
225    /// Set the present timing information which will be used for the next [presentation](crate::Queue::present()) of this surface,
226    /// using [VK_GOOGLE_display_timing].
227    ///
228    /// This can be used to give an id to presentations, for future use of [`vk::PastPresentationTimingGOOGLE`].
229    /// Note that `wgpu-hal` does *not* provide a way to use that API - you should manually access this through [`ash`].
230    ///
231    /// This can also be used to add a "not before" timestamp to the presentation.
232    ///
233    /// The exact semantics of the fields are also documented in the [specification](https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPresentTimeGOOGLE.html) for the extension.
234    ///
235    /// # Panics
236    ///
237    /// - If the surface hasn't been configured.
238    /// - If the surface has been configured for a DXGI swapchain.
239    /// - If the device doesn't [support present timing](wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING).
240    ///
241    /// [VK_GOOGLE_display_timing]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_GOOGLE_display_timing.html
242    #[track_caller]
243    pub fn set_next_present_time(&self, present_timing: vk::PresentTimeGOOGLE) {
244        let mut swapchain = self.swapchain.write();
245        swapchain
246            .as_mut()
247            .expect("Surface should have been configured")
248            .as_any_mut()
249            .downcast_mut::<swapchain::NativeSwapchain>()
250            .expect("Surface should have a native Vulkan swapchain")
251            .set_next_present_time(present_timing);
252    }
253}
254
255#[derive(Debug)]
256pub struct SurfaceTexture {
257    index: u32,
258    texture: Texture,
259    metadata: Box<dyn swapchain::SurfaceTextureMetadata>,
260}
261
262impl crate::DynSurfaceTexture for SurfaceTexture {}
263
264impl Borrow<Texture> for SurfaceTexture {
265    fn borrow(&self) -> &Texture {
266        &self.texture
267    }
268}
269
270impl Borrow<dyn crate::DynTexture> for SurfaceTexture {
271    fn borrow(&self) -> &dyn crate::DynTexture {
272        &self.texture
273    }
274}
275
276pub struct Adapter {
277    raw: vk::PhysicalDevice,
278    instance: Arc<InstanceShared>,
279    //queue_families: Vec<vk::QueueFamilyProperties>,
280    known_memory_flags: vk::MemoryPropertyFlags,
281    phd_capabilities: adapter::PhysicalDeviceProperties,
282    phd_features: PhysicalDeviceFeatures,
283    downlevel_flags: wgt::DownlevelFlags,
284    private_caps: PrivateCapabilities,
285    workarounds: Workarounds,
286}
287
288// TODO there's no reason why this can't be unified--the function pointers should all be the same--it's not clear how to do this with `ash`.
289enum ExtensionFn<T> {
290    /// The loaded function pointer struct for an extension.
291    Extension(T),
292    /// The extension was promoted to a core version of Vulkan and the functions on `ash`'s `DeviceV1_x` traits should be used.
293    Promoted,
294}
295
296struct DeviceExtensionFunctions {
297    debug_utils: Option<ext::debug_utils::Device>,
298    draw_indirect_count: Option<khr::draw_indirect_count::Device>,
299    timeline_semaphore: Option<ExtensionFn<khr::timeline_semaphore::Device>>,
300    ray_tracing: Option<RayTracingDeviceExtensionFunctions>,
301    mesh_shading: Option<ext::mesh_shader::Device>,
302}
303
304struct RayTracingDeviceExtensionFunctions {
305    acceleration_structure: khr::acceleration_structure::Device,
306    buffer_device_address: khr::buffer_device_address::Device,
307}
308
309/// Set of internal capabilities, which don't show up in the exposed
310/// device geometry, but affect the code paths taken internally.
311#[derive(Clone, Debug)]
312struct PrivateCapabilities {
313    image_view_usage: bool,
314    timeline_semaphores: bool,
315    texture_d24: bool,
316    texture_d24_s8: bool,
317    texture_s8: bool,
318    /// Ability to present contents to any screen. Only needed to work around broken platform configurations.
319    can_present: bool,
320    non_coherent_map_mask: wgt::BufferAddress,
321    multi_draw_indirect: bool,
322    max_draw_indirect_count: u32,
323
324    /// True if this adapter advertises the [`robustBufferAccess`][vrba] feature.
325    ///
326    /// Note that Vulkan's `robustBufferAccess` is not sufficient to implement
327    /// `wgpu_hal`'s guarantee that shaders will not access buffer contents via
328    /// a given bindgroup binding outside that binding's [accessible
329    /// region][ar]. Enabling `robustBufferAccess` does ensure that
330    /// out-of-bounds reads and writes are not undefined behavior (that's good),
331    /// but still permits out-of-bounds reads to return data from anywhere
332    /// within the buffer, not just the accessible region.
333    ///
334    /// [ar]: ../struct.BufferBinding.html#accessible-region
335    /// [vrba]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#features-robustBufferAccess
336    robust_buffer_access: bool,
337
338    robust_image_access: bool,
339
340    /// True if this adapter supports the [`VK_EXT_robustness2`] extension's
341    /// [`robustBufferAccess2`] feature.
342    ///
343    /// This is sufficient to implement `wgpu_hal`'s [required bounds-checking][ar] of
344    /// shader accesses to buffer contents. If this feature is not available,
345    /// this backend must have Naga inject bounds checks in the generated
346    /// SPIR-V.
347    ///
348    /// [`VK_EXT_robustness2`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_EXT_robustness2.html
349    /// [`robustBufferAccess2`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPhysicalDeviceRobustness2FeaturesEXT.html#features-robustBufferAccess2
350    /// [ar]: ../struct.BufferBinding.html#accessible-region
351    robust_buffer_access2: bool,
352
353    robust_image_access2: bool,
354    zero_initialize_workgroup_memory: bool,
355    image_format_list: bool,
356    maximum_samplers: u32,
357
358    /// True if this adapter supports the [`VK_KHR_shader_integer_dot_product`] extension
359    /// (promoted to Vulkan 1.3).
360    ///
361    /// This is used to generate optimized code for WGSL's `dot4{I, U}8Packed`.
362    ///
363    /// [`VK_KHR_shader_integer_dot_product`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_KHR_shader_integer_dot_product.html
364    shader_integer_dot_product: bool,
365
366    /// True if this adapter supports 8-bit integers provided by the
367    /// [`VK_KHR_shader_float16_int8`] extension (promoted to Vulkan 1.2).
368    ///
369    /// Allows shaders to declare the "Int8" capability. Note, however, that this
370    /// feature alone allows the use of 8-bit integers "only in the `Private`,
371    /// `Workgroup` (for non-Block variables), and `Function` storage classes"
372    /// ([see spec]). To use 8-bit integers in the interface storage classes (e.g.,
373    /// `StorageBuffer`), you also need to enable the corresponding feature in
374    /// `VkPhysicalDevice8BitStorageFeatures` and declare the corresponding SPIR-V
375    /// capability (e.g., `StorageBuffer8BitAccess`).
376    ///
377    /// [`VK_KHR_shader_float16_int8`]: https://registry.khronos.org/vulkan/specs/latest/man/html/VK_KHR_shader_float16_int8.html
378    /// [see spec]: https://registry.khronos.org/vulkan/specs/latest/man/html/VkPhysicalDeviceShaderFloat16Int8Features.html#extension-features-shaderInt8
379    shader_int8: bool,
380
381    /// This is done to panic before undefined behavior, and is imperfect.
382    /// Basically, to allow implementations to emulate mv using instancing, if you
383    /// want to draw `n` instances to VR, you must draw `2n` instances, but you
384    /// can never draw more than `u32::MAX` instances. Therefore, when drawing
385    /// multiview on some vulkan implementations, it might restrict the instance
386    /// count, which isn't usually a thing in webgpu. We don't expose this limit
387    /// because its strange, i.e. only occurs on certain vulkan implementations
388    /// if you are drawing more than 128 million instances. We still want to avoid
389    /// undefined behavior in this situation, so we panic if the limit is violated.
390    multiview_instance_index_limit: u32,
391
392    /// BufferUsages::ACCELERATION_STRUCTURE_SCRATCH allows usage as a scratch buffer.
393    /// Vulkan has no way to specify this as a usage, and it maps to other usages, but
394    /// these usages do not have as high of an alignment requirement using the buffer as
395    ///  a scratch buffer when building acceleration structures.
396    scratch_buffer_alignment: u32,
397}
398
399bitflags::bitflags!(
400    /// Workaround flags.
401    #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
402    pub struct Workarounds: u32 {
403        /// Only generate SPIR-V for one entry point at a time.
404        const SEPARATE_ENTRY_POINTS = 0x1;
405        /// Qualcomm OOMs when there are zero color attachments but a non-null pointer
406        /// to a subpass resolve attachment array. This nulls out that pointer in that case.
407        const EMPTY_RESOLVE_ATTACHMENT_LISTS = 0x2;
408        /// If the following code returns false, then nvidia will end up filling the wrong range.
409        ///
410        /// ```skip
411        /// fn nvidia_succeeds() -> bool {
412        ///   # let (copy_length, start_offset) = (0, 0);
413        ///     if copy_length >= 4096 {
414        ///         if start_offset % 16 != 0 {
415        ///             if copy_length == 4096 {
416        ///                 return true;
417        ///             }
418        ///             if copy_length % 16 == 0 {
419        ///                 return false;
420        ///             }
421        ///         }
422        ///     }
423        ///     true
424        /// }
425        /// ```
426        ///
427        /// As such, we need to make sure all calls to vkCmdFillBuffer are aligned to 16 bytes
428        /// if they cover a range of 4096 bytes or more.
429        const FORCE_FILL_BUFFER_WITH_SIZE_GREATER_4096_ALIGNED_OFFSET_16 = 0x4;
430    }
431);
432
433#[derive(Clone, Debug, Eq, Hash, PartialEq)]
434struct AttachmentKey {
435    format: vk::Format,
436    layout: vk::ImageLayout,
437    ops: crate::AttachmentOps,
438}
439
440impl AttachmentKey {
441    /// Returns an attachment key for a compatible attachment.
442    fn compatible(format: vk::Format, layout: vk::ImageLayout) -> Self {
443        Self {
444            format,
445            layout,
446            ops: crate::AttachmentOps::all(),
447        }
448    }
449}
450
451#[derive(Clone, Eq, Hash, PartialEq)]
452struct ColorAttachmentKey {
453    base: AttachmentKey,
454    resolve: Option<AttachmentKey>,
455}
456
457#[derive(Clone, Eq, Hash, PartialEq)]
458struct DepthStencilAttachmentKey {
459    base: AttachmentKey,
460    stencil_ops: crate::AttachmentOps,
461}
462
463#[derive(Clone, Eq, Default, Hash, PartialEq)]
464struct RenderPassKey {
465    colors: ArrayVec<Option<ColorAttachmentKey>, { crate::MAX_COLOR_ATTACHMENTS }>,
466    depth_stencil: Option<DepthStencilAttachmentKey>,
467    sample_count: u32,
468    multiview_mask: Option<NonZeroU32>,
469}
470
471struct DeviceShared {
472    raw: ash::Device,
473    family_index: u32,
474    queue_index: u32,
475    raw_queue: vk::Queue,
476    instance: Arc<InstanceShared>,
477    physical_device: vk::PhysicalDevice,
478    enabled_extensions: Vec<&'static CStr>,
479    extension_fns: DeviceExtensionFunctions,
480    vendor_id: u32,
481    pipeline_cache_validation_key: [u8; 16],
482    timestamp_period: f32,
483    private_caps: PrivateCapabilities,
484    workarounds: Workarounds,
485    features: wgt::Features,
486    render_passes: Mutex<FastHashMap<RenderPassKey, vk::RenderPass>>,
487    sampler_cache: Mutex<sampler::SamplerCache>,
488    memory_allocations_counter: InternalCounter,
489
490    /// Because we have cached framebuffers which are not deleted from until
491    /// the device is destroyed, if the implementation of vulkan re-uses handles
492    /// we need some way to differentiate between the old handle and the new handle.
493    /// This factory allows us to have a dedicated identity value for each texture.
494    texture_identity_factory: ResourceIdentityFactory<vk::Image>,
495    /// As above, for texture views.
496    texture_view_identity_factory: ResourceIdentityFactory<vk::ImageView>,
497
498    empty_descriptor_set_layout: vk::DescriptorSetLayout,
499
500    // The `drop_guard` field must be the last field of this struct so it is dropped last.
501    // Do not add new fields after it.
502    drop_guard: Option<crate::DropGuard>,
503}
504
505impl Drop for DeviceShared {
506    fn drop(&mut self) {
507        for &raw in self.render_passes.lock().values() {
508            unsafe { self.raw.destroy_render_pass(raw, None) };
509        }
510        unsafe {
511            self.raw
512                .destroy_descriptor_set_layout(self.empty_descriptor_set_layout, None)
513        };
514        if self.drop_guard.is_none() {
515            unsafe { self.raw.destroy_device(None) };
516        }
517    }
518}
519
520pub struct Device {
521    mem_allocator: Mutex<gpu_allocator::vulkan::Allocator>,
522    desc_allocator:
523        Mutex<gpu_descriptor::DescriptorAllocator<vk::DescriptorPool, vk::DescriptorSet>>,
524    valid_ash_memory_types: u32,
525    naga_options: naga::back::spv::Options<'static>,
526    #[cfg(feature = "renderdoc")]
527    render_doc: crate::auxil::renderdoc::RenderDoc,
528    counters: Arc<wgt::HalCounters>,
529    // Struct members are dropped from first to last, put the Device last to ensure that
530    // all resources that depends on it are destroyed before it like the mem_allocator
531    shared: Arc<DeviceShared>,
532}
533
534impl Drop for Device {
535    fn drop(&mut self) {
536        unsafe { self.desc_allocator.lock().cleanup(&*self.shared) };
537    }
538}
539
540/// Semaphores for forcing queue submissions to run in order.
541///
542/// The [`wgpu_hal::Queue`] trait promises that if two calls to [`submit`] are
543/// ordered, then the first submission will finish on the GPU before the second
544/// submission begins. To get this behavior on Vulkan we need to pass semaphores
545/// to [`vkQueueSubmit`] for the commands to wait on before beginning execution,
546/// and to signal when their execution is done.
547///
548/// Normally this can be done with a single semaphore, waited on and then
549/// signalled for each submission. At any given time there's exactly one
550/// submission that would signal the semaphore, and exactly one waiting on it,
551/// as Vulkan requires.
552///
553/// However, as of Oct 2021, bug [#5508] in the Mesa ANV drivers caused them to
554/// hang if we use a single semaphore. The workaround is to alternate between
555/// two semaphores. The bug has been fixed in Mesa, but we should probably keep
556/// the workaround until, say, Oct 2026.
557///
558/// [`wgpu_hal::Queue`]: crate::Queue
559/// [`submit`]: crate::Queue::submit
560/// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
561/// [#5508]: https://gitlab.freedesktop.org/mesa/mesa/-/issues/5508
562#[derive(Clone)]
563struct RelaySemaphores {
564    /// The semaphore the next submission should wait on before beginning
565    /// execution on the GPU. This is `None` for the first submission, which
566    /// should not wait on anything at all.
567    wait: Option<vk::Semaphore>,
568
569    /// The semaphore the next submission should signal when it has finished
570    /// execution on the GPU.
571    signal: vk::Semaphore,
572}
573
574impl RelaySemaphores {
575    fn new(device: &DeviceShared) -> Result<Self, crate::DeviceError> {
576        Ok(Self {
577            wait: None,
578            signal: device.new_binary_semaphore("RelaySemaphores: 1")?,
579        })
580    }
581
582    /// Advances the semaphores, returning the semaphores that should be used for a submission.
583    fn advance(&mut self, device: &DeviceShared) -> Result<Self, crate::DeviceError> {
584        let old = self.clone();
585
586        // Build the state for the next submission.
587        match self.wait {
588            None => {
589                // The `old` values describe the first submission to this queue.
590                // The second submission should wait on `old.signal`, and then
591                // signal a new semaphore which we'll create now.
592                self.wait = Some(old.signal);
593                self.signal = device.new_binary_semaphore("RelaySemaphores: 2")?;
594            }
595            Some(ref mut wait) => {
596                // What this submission signals, the next should wait.
597                mem::swap(wait, &mut self.signal);
598            }
599        };
600
601        Ok(old)
602    }
603
604    /// Destroys the semaphores.
605    unsafe fn destroy(&self, device: &ash::Device) {
606        unsafe {
607            if let Some(wait) = self.wait {
608                device.destroy_semaphore(wait, None);
609            }
610            device.destroy_semaphore(self.signal, None);
611        }
612    }
613}
614
615pub struct Queue {
616    raw: vk::Queue,
617    device: Arc<DeviceShared>,
618    family_index: u32,
619    relay_semaphores: Mutex<RelaySemaphores>,
620    signal_semaphores: Mutex<SemaphoreList>,
621}
622
623impl Queue {
624    pub fn as_raw(&self) -> vk::Queue {
625        self.raw
626    }
627}
628
629impl Drop for Queue {
630    fn drop(&mut self) {
631        unsafe { self.relay_semaphores.lock().destroy(&self.device.raw) };
632    }
633}
634#[derive(Debug)]
635enum BufferMemoryBacking {
636    Managed(gpu_allocator::vulkan::Allocation),
637    VulkanMemory {
638        memory: vk::DeviceMemory,
639        offset: u64,
640        size: u64,
641    },
642}
643impl BufferMemoryBacking {
644    fn memory(&self) -> vk::DeviceMemory {
645        match self {
646            Self::Managed(m) => unsafe { m.memory() },
647            Self::VulkanMemory { memory, .. } => *memory,
648        }
649    }
650    fn offset(&self) -> u64 {
651        match self {
652            Self::Managed(m) => m.offset(),
653            Self::VulkanMemory { offset, .. } => *offset,
654        }
655    }
656    fn size(&self) -> u64 {
657        match self {
658            Self::Managed(m) => m.size(),
659            Self::VulkanMemory { size, .. } => *size,
660        }
661    }
662}
663#[derive(Debug)]
664pub struct Buffer {
665    raw: vk::Buffer,
666    allocation: Option<Mutex<BufferMemoryBacking>>,
667}
668impl Buffer {
669    /// # Safety
670    ///
671    /// - `vk_buffer`'s memory must be managed by the caller
672    /// - Externally imported buffers can't be mapped by `wgpu`
673    pub unsafe fn from_raw(vk_buffer: vk::Buffer) -> Self {
674        Self {
675            raw: vk_buffer,
676            allocation: None,
677        }
678    }
679    /// # Safety
680    /// - We will use this buffer and the buffer's backing memory range as if we have exclusive ownership over it, until the wgpu resource is dropped and the wgpu-hal object is cleaned up
681    /// - Externally imported buffers can't be mapped by `wgpu`
682    /// - `offset` and `size` must be valid with the allocation of `memory`
683    pub unsafe fn from_raw_managed(
684        vk_buffer: vk::Buffer,
685        memory: vk::DeviceMemory,
686        offset: u64,
687        size: u64,
688    ) -> Self {
689        Self {
690            raw: vk_buffer,
691            allocation: Some(Mutex::new(BufferMemoryBacking::VulkanMemory {
692                memory,
693                offset,
694                size,
695            })),
696        }
697    }
698}
699
700impl crate::DynBuffer for Buffer {}
701
702#[derive(Debug)]
703pub struct AccelerationStructure {
704    raw: vk::AccelerationStructureKHR,
705    buffer: vk::Buffer,
706    allocation: gpu_allocator::vulkan::Allocation,
707    compacted_size_query: Option<vk::QueryPool>,
708}
709
710impl crate::DynAccelerationStructure for AccelerationStructure {}
711
712#[derive(Debug)]
713pub enum TextureMemory {
714    // shared memory in GPU allocator (owned by wgpu-hal)
715    Allocation(gpu_allocator::vulkan::Allocation),
716
717    // dedicated memory (owned by wgpu-hal)
718    Dedicated(vk::DeviceMemory),
719
720    // memory not owned by wgpu
721    External,
722}
723
724#[derive(Debug)]
725pub struct Texture {
726    raw: vk::Image,
727    memory: TextureMemory,
728    format: wgt::TextureFormat,
729    copy_size: crate::CopyExtent,
730    identity: ResourceIdentity<vk::Image>,
731
732    // The `drop_guard` field must be the last field of this struct so it is dropped last.
733    // Do not add new fields after it.
734    drop_guard: Option<crate::DropGuard>,
735}
736
737impl crate::DynTexture for Texture {}
738
739impl Texture {
740    /// # Safety
741    ///
742    /// - The image handle must not be manually destroyed
743    pub unsafe fn raw_handle(&self) -> vk::Image {
744        self.raw
745    }
746
747    /// # Safety
748    ///
749    /// - The caller must not free the `vk::DeviceMemory` or
750    ///   `gpu_alloc::MemoryBlock` in the returned `TextureMemory`.
751    pub unsafe fn memory(&self) -> &TextureMemory {
752        &self.memory
753    }
754}
755
756#[derive(Debug)]
757pub struct TextureView {
758    raw_texture: vk::Image,
759    raw: vk::ImageView,
760    _layers: NonZeroU32,
761    format: wgt::TextureFormat,
762    raw_format: vk::Format,
763    base_mip_level: u32,
764    dimension: wgt::TextureViewDimension,
765    texture_identity: ResourceIdentity<vk::Image>,
766    view_identity: ResourceIdentity<vk::ImageView>,
767}
768
769impl crate::DynTextureView for TextureView {}
770
771impl TextureView {
772    /// # Safety
773    ///
774    /// - The image view handle must not be manually destroyed
775    pub unsafe fn raw_handle(&self) -> vk::ImageView {
776        self.raw
777    }
778
779    /// Returns the raw texture view, along with its identity.
780    fn identified_raw_view(&self) -> IdentifiedTextureView {
781        IdentifiedTextureView {
782            raw: self.raw,
783            identity: self.view_identity,
784        }
785    }
786}
787
788#[derive(Debug)]
789pub struct Sampler {
790    raw: vk::Sampler,
791    create_info: vk::SamplerCreateInfo<'static>,
792}
793
794impl crate::DynSampler for Sampler {}
795
796/// Information about a binding within a specific BindGroupLayout / BindGroup.
797/// This will be used to construct a [`naga::back::spv::BindingInfo`], where
798/// the descriptor set value will be taken from the index of the group.
799#[derive(Copy, Clone, Debug)]
800struct BindingInfo {
801    binding: u32,
802    binding_array_size: Option<NonZeroU32>,
803}
804
805#[derive(Debug)]
806pub struct BindGroupLayout {
807    raw: vk::DescriptorSetLayout,
808    desc_count: gpu_descriptor::DescriptorTotalCount,
809    /// Sorted list of entries.
810    entries: Box<[wgt::BindGroupLayoutEntry]>,
811    /// Map of original binding index to remapped binding index and optional
812    /// array size.
813    binding_map: Vec<(u32, BindingInfo)>,
814    contains_binding_arrays: bool,
815}
816
817impl crate::DynBindGroupLayout for BindGroupLayout {}
818
819#[derive(Debug)]
820pub struct PipelineLayout {
821    raw: vk::PipelineLayout,
822    binding_map: naga::back::spv::BindingMap,
823}
824
825impl crate::DynPipelineLayout for PipelineLayout {}
826
827#[derive(Debug)]
828pub struct BindGroup {
829    set: gpu_descriptor::DescriptorSet<vk::DescriptorSet>,
830}
831
832impl crate::DynBindGroup for BindGroup {}
833
834/// Miscellaneous allocation recycling pool for `CommandAllocator`.
835#[derive(Default)]
836struct Temp {
837    marker: Vec<u8>,
838    buffer_barriers: Vec<vk::BufferMemoryBarrier<'static>>,
839    image_barriers: Vec<vk::ImageMemoryBarrier<'static>>,
840}
841
842impl Temp {
843    fn clear(&mut self) {
844        self.marker.clear();
845        self.buffer_barriers.clear();
846        self.image_barriers.clear();
847    }
848
849    fn make_c_str(&mut self, name: &str) -> &CStr {
850        self.marker.clear();
851        self.marker.extend_from_slice(name.as_bytes());
852        self.marker.push(0);
853        unsafe { CStr::from_bytes_with_nul_unchecked(&self.marker) }
854    }
855}
856
857/// Generates unique IDs for each resource of type `T`.
858///
859/// Because vk handles are not permanently unique, this
860/// provides a way to generate unique IDs for each resource.
861struct ResourceIdentityFactory<T> {
862    #[cfg(not(target_has_atomic = "64"))]
863    next_id: Mutex<u64>,
864    #[cfg(target_has_atomic = "64")]
865    next_id: core::sync::atomic::AtomicU64,
866    _phantom: PhantomData<T>,
867}
868
869impl<T> ResourceIdentityFactory<T> {
870    fn new() -> Self {
871        Self {
872            #[cfg(not(target_has_atomic = "64"))]
873            next_id: Mutex::new(0),
874            #[cfg(target_has_atomic = "64")]
875            next_id: core::sync::atomic::AtomicU64::new(0),
876            _phantom: PhantomData,
877        }
878    }
879
880    /// Returns a new unique ID for a resource of type `T`.
881    fn next(&self) -> ResourceIdentity<T> {
882        #[cfg(not(target_has_atomic = "64"))]
883        {
884            let mut next_id = self.next_id.lock();
885            let id = *next_id;
886            *next_id += 1;
887            ResourceIdentity {
888                id,
889                _phantom: PhantomData,
890            }
891        }
892
893        #[cfg(target_has_atomic = "64")]
894        ResourceIdentity {
895            id: self
896                .next_id
897                .fetch_add(1, core::sync::atomic::Ordering::Relaxed),
898            _phantom: PhantomData,
899        }
900    }
901}
902
903/// A unique identifier for a resource of type `T`.
904///
905/// This is used as a hashable key for resources, which
906/// is permanently unique through the lifetime of the program.
907#[derive(Debug, Copy, Clone, Eq, Hash, PartialEq)]
908struct ResourceIdentity<T> {
909    id: u64,
910    _phantom: PhantomData<T>,
911}
912
913#[derive(Clone, Eq, Hash, PartialEq)]
914struct FramebufferKey {
915    raw_pass: vk::RenderPass,
916    /// Because this is used as a key in a hash map, we need to include the identity
917    /// so that this hashes differently, even if the ImageView handles are the same
918    /// between different views.
919    attachment_identities: ArrayVec<ResourceIdentity<vk::ImageView>, { MAX_TOTAL_ATTACHMENTS }>,
920    /// While this is redundant for calculating the hash, we need access to an array
921    /// of all the raw ImageViews when we are creating the actual framebuffer,
922    /// so we store this here.
923    attachment_views: ArrayVec<vk::ImageView, { MAX_TOTAL_ATTACHMENTS }>,
924    extent: wgt::Extent3d,
925}
926
927impl FramebufferKey {
928    fn push_view(&mut self, view: IdentifiedTextureView) {
929        self.attachment_identities.push(view.identity);
930        self.attachment_views.push(view.raw);
931    }
932}
933
934/// A texture view paired with its identity.
935#[derive(Copy, Clone)]
936struct IdentifiedTextureView {
937    raw: vk::ImageView,
938    identity: ResourceIdentity<vk::ImageView>,
939}
940
941#[derive(Clone, Eq, Hash, PartialEq)]
942struct TempTextureViewKey {
943    texture: vk::Image,
944    /// As this is used in a hashmap, we need to
945    /// include the identity so that this hashes differently,
946    /// even if the Image handles are the same between different images.
947    texture_identity: ResourceIdentity<vk::Image>,
948    format: vk::Format,
949    mip_level: u32,
950    depth_slice: u32,
951}
952
953pub struct CommandEncoder {
954    raw: vk::CommandPool,
955    device: Arc<DeviceShared>,
956
957    /// The current command buffer, if `self` is in the ["recording"]
958    /// state.
959    ///
960    /// ["recording"]: crate::CommandEncoder
961    ///
962    /// If non-`null`, the buffer is in the Vulkan "recording" state.
963    active: vk::CommandBuffer,
964
965    /// What kind of pass we are currently within: compute or render.
966    bind_point: vk::PipelineBindPoint,
967
968    /// Allocation recycling pool for this encoder.
969    temp: Temp,
970
971    /// A pool of available command buffers.
972    ///
973    /// These are all in the Vulkan "initial" state.
974    free: Vec<vk::CommandBuffer>,
975
976    /// A pool of discarded command buffers.
977    ///
978    /// These could be in any Vulkan state except "pending".
979    discarded: Vec<vk::CommandBuffer>,
980
981    /// If this is true, the active renderpass enabled a debug span,
982    /// and needs to be disabled on renderpass close.
983    rpass_debug_marker_active: bool,
984
985    /// If set, the end of the next render/compute pass will write a timestamp at
986    /// the given pool & location.
987    end_of_pass_timer_query: Option<(vk::QueryPool, u32)>,
988
989    framebuffers: FastHashMap<FramebufferKey, vk::Framebuffer>,
990    temp_texture_views: FastHashMap<TempTextureViewKey, IdentifiedTextureView>,
991
992    counters: Arc<wgt::HalCounters>,
993
994    current_pipeline_is_multiview: bool,
995}
996
997impl Drop for CommandEncoder {
998    fn drop(&mut self) {
999        // SAFETY:
1000        //
1001        // VUID-vkDestroyCommandPool-commandPool-00041: wgpu_hal requires that a
1002        // `CommandBuffer` must live until its execution is complete, and that a
1003        // `CommandBuffer` must not outlive the `CommandEncoder` that built it.
1004        // Thus, we know that none of our `CommandBuffers` are in the "pending"
1005        // state.
1006        //
1007        // The other VUIDs are pretty obvious.
1008        unsafe {
1009            // `vkDestroyCommandPool` also frees any command buffers allocated
1010            // from that pool, so there's no need to explicitly call
1011            // `vkFreeCommandBuffers` on `cmd_encoder`'s `free` and `discarded`
1012            // fields.
1013            self.device.raw.destroy_command_pool(self.raw, None);
1014        }
1015
1016        for (_, fb) in self.framebuffers.drain() {
1017            unsafe { self.device.raw.destroy_framebuffer(fb, None) };
1018        }
1019
1020        for (_, view) in self.temp_texture_views.drain() {
1021            unsafe { self.device.raw.destroy_image_view(view.raw, None) };
1022        }
1023
1024        self.counters.command_encoders.sub(1);
1025    }
1026}
1027
1028impl CommandEncoder {
1029    /// # Safety
1030    ///
1031    /// - The command buffer handle must not be manually destroyed
1032    pub unsafe fn raw_handle(&self) -> vk::CommandBuffer {
1033        self.active
1034    }
1035}
1036
1037impl fmt::Debug for CommandEncoder {
1038    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1039        f.debug_struct("CommandEncoder")
1040            .field("raw", &self.raw)
1041            .finish()
1042    }
1043}
1044
1045#[derive(Debug)]
1046pub struct CommandBuffer {
1047    raw: vk::CommandBuffer,
1048}
1049
1050impl crate::DynCommandBuffer for CommandBuffer {}
1051
1052#[derive(Debug)]
1053pub enum ShaderModule {
1054    Raw(vk::ShaderModule),
1055    Intermediate {
1056        naga_shader: crate::NagaShader,
1057        runtime_checks: wgt::ShaderRuntimeChecks,
1058    },
1059}
1060
1061impl crate::DynShaderModule for ShaderModule {}
1062
1063#[derive(Debug)]
1064pub struct RenderPipeline {
1065    raw: vk::Pipeline,
1066    is_multiview: bool,
1067}
1068
1069impl crate::DynRenderPipeline for RenderPipeline {}
1070
1071#[derive(Debug)]
1072pub struct ComputePipeline {
1073    raw: vk::Pipeline,
1074}
1075
1076impl crate::DynComputePipeline for ComputePipeline {}
1077
1078#[derive(Debug)]
1079pub struct PipelineCache {
1080    raw: vk::PipelineCache,
1081}
1082
1083impl crate::DynPipelineCache for PipelineCache {}
1084
1085#[derive(Debug)]
1086pub struct QuerySet {
1087    raw: vk::QueryPool,
1088}
1089
1090impl crate::DynQuerySet for QuerySet {}
1091
1092/// The [`Api::Fence`] type for [`vulkan::Api`].
1093///
1094/// This is an `enum` because there are two possible implementations of
1095/// `wgpu-hal` fences on Vulkan: Vulkan fences, which work on any version of
1096/// Vulkan, and Vulkan timeline semaphores, which are easier and cheaper but
1097/// require non-1.0 features.
1098///
1099/// [`Device::create_fence`] returns a [`TimelineSemaphore`] if
1100/// [`VK_KHR_timeline_semaphore`] is available and enabled, and a [`FencePool`]
1101/// otherwise.
1102///
1103/// [`Api::Fence`]: crate::Api::Fence
1104/// [`vulkan::Api`]: Api
1105/// [`Device::create_fence`]: crate::Device::create_fence
1106/// [`TimelineSemaphore`]: Fence::TimelineSemaphore
1107/// [`VK_KHR_timeline_semaphore`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VK_KHR_timeline_semaphore
1108/// [`FencePool`]: Fence::FencePool
1109#[derive(Debug)]
1110pub enum Fence {
1111    /// A Vulkan [timeline semaphore].
1112    ///
1113    /// These are simpler to use than Vulkan fences, since timeline semaphores
1114    /// work exactly the way [`wpgu_hal::Api::Fence`] is specified to work.
1115    ///
1116    /// [timeline semaphore]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-semaphores
1117    /// [`wpgu_hal::Api::Fence`]: crate::Api::Fence
1118    TimelineSemaphore(vk::Semaphore),
1119
1120    /// A collection of Vulkan [fence]s, each associated with a [`FenceValue`].
1121    ///
1122    /// The effective [`FenceValue`] of this variant is the greater of
1123    /// `last_completed` and the maximum value associated with a signalled fence
1124    /// in `active`.
1125    ///
1126    /// Fences are available in all versions of Vulkan, but since they only have
1127    /// two states, "signaled" and "unsignaled", we need to use a separate fence
1128    /// for each queue submission we might want to wait for, and remember which
1129    /// [`FenceValue`] each one represents.
1130    ///
1131    /// [fence]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-fences
1132    /// [`FenceValue`]: crate::FenceValue
1133    FencePool {
1134        last_completed: crate::FenceValue,
1135        /// The pending fence values have to be ascending.
1136        active: Vec<(crate::FenceValue, vk::Fence)>,
1137        free: Vec<vk::Fence>,
1138    },
1139}
1140
1141impl crate::DynFence for Fence {}
1142
1143impl Fence {
1144    /// Return the highest [`FenceValue`] among the signalled fences in `active`.
1145    ///
1146    /// As an optimization, assume that we already know that the fence has
1147    /// reached `last_completed`, and don't bother checking fences whose values
1148    /// are less than that: those fences remain in the `active` array only
1149    /// because we haven't called `maintain` yet to clean them up.
1150    ///
1151    /// [`FenceValue`]: crate::FenceValue
1152    fn check_active(
1153        device: &ash::Device,
1154        mut last_completed: crate::FenceValue,
1155        active: &[(crate::FenceValue, vk::Fence)],
1156    ) -> Result<crate::FenceValue, crate::DeviceError> {
1157        for &(value, raw) in active.iter() {
1158            unsafe {
1159                if value > last_completed
1160                    && device
1161                        .get_fence_status(raw)
1162                        .map_err(map_host_device_oom_and_lost_err)?
1163                {
1164                    last_completed = value;
1165                }
1166            }
1167        }
1168        Ok(last_completed)
1169    }
1170
1171    /// Return the highest signalled [`FenceValue`] for `self`.
1172    ///
1173    /// [`FenceValue`]: crate::FenceValue
1174    fn get_latest(
1175        &self,
1176        device: &ash::Device,
1177        extension: Option<&ExtensionFn<khr::timeline_semaphore::Device>>,
1178    ) -> Result<crate::FenceValue, crate::DeviceError> {
1179        match *self {
1180            Self::TimelineSemaphore(raw) => unsafe {
1181                Ok(match *extension.unwrap() {
1182                    ExtensionFn::Extension(ref ext) => ext
1183                        .get_semaphore_counter_value(raw)
1184                        .map_err(map_host_device_oom_and_lost_err)?,
1185                    ExtensionFn::Promoted => device
1186                        .get_semaphore_counter_value(raw)
1187                        .map_err(map_host_device_oom_and_lost_err)?,
1188                })
1189            },
1190            Self::FencePool {
1191                last_completed,
1192                ref active,
1193                free: _,
1194            } => Self::check_active(device, last_completed, active),
1195        }
1196    }
1197
1198    /// Trim the internal state of this [`Fence`].
1199    ///
1200    /// This function has no externally visible effect, but you should call it
1201    /// periodically to keep this fence's resource consumption under control.
1202    ///
1203    /// For fences using the [`FencePool`] implementation, this function
1204    /// recycles fences that have been signaled. If you don't call this,
1205    /// [`Queue::submit`] will just keep allocating a new Vulkan fence every
1206    /// time it's called.
1207    ///
1208    /// [`FencePool`]: Fence::FencePool
1209    /// [`Queue::submit`]: crate::Queue::submit
1210    fn maintain(&mut self, device: &ash::Device) -> Result<(), crate::DeviceError> {
1211        match *self {
1212            Self::TimelineSemaphore(_) => {}
1213            Self::FencePool {
1214                ref mut last_completed,
1215                ref mut active,
1216                ref mut free,
1217            } => {
1218                let latest = Self::check_active(device, *last_completed, active)?;
1219                let base_free = free.len();
1220                for &(value, raw) in active.iter() {
1221                    if value <= latest {
1222                        free.push(raw);
1223                    }
1224                }
1225                if free.len() != base_free {
1226                    active.retain(|&(value, _)| value > latest);
1227                    unsafe { device.reset_fences(&free[base_free..]) }
1228                        .map_err(map_device_oom_err)?
1229                }
1230                *last_completed = latest;
1231            }
1232        }
1233        Ok(())
1234    }
1235}
1236
1237impl crate::Queue for Queue {
1238    type A = Api;
1239
1240    unsafe fn submit(
1241        &self,
1242        command_buffers: &[&CommandBuffer],
1243        surface_textures: &[&SurfaceTexture],
1244        (signal_fence, signal_value): (&mut Fence, crate::FenceValue),
1245    ) -> Result<(), crate::DeviceError> {
1246        let mut fence_raw = vk::Fence::null();
1247
1248        let mut wait_semaphores = SemaphoreList::new(SemaphoreListMode::Wait);
1249        let mut signal_semaphores = SemaphoreList::new(SemaphoreListMode::Signal);
1250
1251        // Double check that the same swapchain image isn't being given to us multiple times,
1252        // as that will deadlock when we try to lock them all.
1253        debug_assert!(
1254            {
1255                let mut check = HashSet::with_capacity(surface_textures.len());
1256                // We compare the Box by pointer, as Eq isn't well defined for SurfaceSemaphores.
1257                for st in surface_textures {
1258                    let ptr: *const () = <*const _>::cast(&*st.metadata);
1259                    check.insert(ptr as usize);
1260                }
1261                check.len() == surface_textures.len()
1262            },
1263            "More than one surface texture is being used from the same swapchain. This will cause a deadlock in release."
1264        );
1265
1266        let locked_swapchain_semaphores = surface_textures
1267            .iter()
1268            .map(|st| st.metadata.get_semaphore_guard())
1269            .collect::<Vec<_>>();
1270
1271        for mut semaphores in locked_swapchain_semaphores {
1272            semaphores.set_used_fence_value(signal_value);
1273
1274            // If we're the first submission to operate on this image, wait on
1275            // its acquire semaphore, to make sure the presentation engine is
1276            // done with it.
1277            if let Some(sem) = semaphores.get_acquire_wait_semaphore() {
1278                wait_semaphores.push_wait(sem, vk::PipelineStageFlags::TOP_OF_PIPE);
1279            }
1280
1281            // Get a semaphore to signal when we're done writing to this surface
1282            // image. Presentation of this image will wait for this.
1283            let signal_semaphore = semaphores.get_submit_signal_semaphore(&self.device)?;
1284            signal_semaphores.push_signal(signal_semaphore);
1285        }
1286
1287        let mut guard = self.signal_semaphores.lock();
1288        if !guard.is_empty() {
1289            signal_semaphores.append(&mut guard);
1290        }
1291
1292        // In order for submissions to be strictly ordered, we encode a dependency between each submission
1293        // using a pair of semaphores. This adds a wait if it is needed, and signals the next semaphore.
1294        let semaphore_state = self.relay_semaphores.lock().advance(&self.device)?;
1295
1296        if let Some(sem) = semaphore_state.wait {
1297            wait_semaphores.push_wait(
1298                SemaphoreType::Binary(sem),
1299                vk::PipelineStageFlags::TOP_OF_PIPE,
1300            );
1301        }
1302
1303        signal_semaphores.push_signal(SemaphoreType::Binary(semaphore_state.signal));
1304
1305        // We need to signal our wgpu::Fence if we have one, this adds it to the signal list.
1306        signal_fence.maintain(&self.device.raw)?;
1307        match *signal_fence {
1308            Fence::TimelineSemaphore(raw) => {
1309                signal_semaphores.push_signal(SemaphoreType::Timeline(raw, signal_value));
1310            }
1311            Fence::FencePool {
1312                ref mut active,
1313                ref mut free,
1314                ..
1315            } => {
1316                fence_raw = match free.pop() {
1317                    Some(raw) => raw,
1318                    None => unsafe {
1319                        self.device
1320                            .raw
1321                            .create_fence(&vk::FenceCreateInfo::default(), None)
1322                            .map_err(map_host_device_oom_err)?
1323                    },
1324                };
1325                active.push((signal_value, fence_raw));
1326            }
1327        }
1328
1329        let vk_cmd_buffers = command_buffers
1330            .iter()
1331            .map(|cmd| cmd.raw)
1332            .collect::<Vec<_>>();
1333
1334        let mut vk_info = vk::SubmitInfo::default().command_buffers(&vk_cmd_buffers);
1335        let mut vk_timeline_info = mem::MaybeUninit::uninit();
1336        vk_info = SemaphoreList::add_to_submit(
1337            &mut wait_semaphores,
1338            &mut signal_semaphores,
1339            vk_info,
1340            &mut vk_timeline_info,
1341        );
1342
1343        profiling::scope!("vkQueueSubmit");
1344        unsafe {
1345            self.device
1346                .raw
1347                .queue_submit(self.raw, &[vk_info], fence_raw)
1348                .map_err(map_host_device_oom_and_lost_err)?
1349        };
1350        Ok(())
1351    }
1352
1353    unsafe fn present(
1354        &self,
1355        surface: &Surface,
1356        texture: SurfaceTexture,
1357    ) -> Result<(), crate::SurfaceError> {
1358        let mut swapchain = surface.swapchain.write();
1359
1360        unsafe { swapchain.as_mut().unwrap().present(self, texture) }
1361    }
1362
1363    unsafe fn get_timestamp_period(&self) -> f32 {
1364        self.device.timestamp_period
1365    }
1366}
1367
1368impl Queue {
1369    pub fn raw_device(&self) -> &ash::Device {
1370        &self.device.raw
1371    }
1372
1373    pub fn add_signal_semaphore(&self, semaphore: vk::Semaphore, semaphore_value: Option<u64>) {
1374        let mut guard = self.signal_semaphores.lock();
1375        if let Some(value) = semaphore_value {
1376            guard.push_signal(SemaphoreType::Timeline(semaphore, value));
1377        } else {
1378            guard.push_signal(SemaphoreType::Binary(semaphore));
1379        }
1380    }
1381}
1382
1383/// Maps
1384///
1385/// - VK_ERROR_OUT_OF_HOST_MEMORY
1386/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1387fn map_host_device_oom_err(err: vk::Result) -> crate::DeviceError {
1388    match err {
1389        vk::Result::ERROR_OUT_OF_HOST_MEMORY | vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => {
1390            get_oom_err(err)
1391        }
1392        e => get_unexpected_err(e),
1393    }
1394}
1395
1396/// Maps
1397///
1398/// - VK_ERROR_OUT_OF_HOST_MEMORY
1399/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1400/// - VK_ERROR_DEVICE_LOST
1401fn map_host_device_oom_and_lost_err(err: vk::Result) -> crate::DeviceError {
1402    match err {
1403        vk::Result::ERROR_DEVICE_LOST => get_lost_err(),
1404        other => map_host_device_oom_err(other),
1405    }
1406}
1407
1408/// Maps
1409///
1410/// - VK_ERROR_OUT_OF_HOST_MEMORY
1411/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1412/// - VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1413fn map_host_device_oom_and_ioca_err(err: vk::Result) -> crate::DeviceError {
1414    // We don't use VK_KHR_buffer_device_address
1415    // VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1416    map_host_device_oom_err(err)
1417}
1418
1419/// Maps
1420///
1421/// - VK_ERROR_OUT_OF_HOST_MEMORY
1422fn map_host_oom_err(err: vk::Result) -> crate::DeviceError {
1423    match err {
1424        vk::Result::ERROR_OUT_OF_HOST_MEMORY => get_oom_err(err),
1425        e => get_unexpected_err(e),
1426    }
1427}
1428
1429/// Maps
1430///
1431/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1432fn map_device_oom_err(err: vk::Result) -> crate::DeviceError {
1433    match err {
1434        vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => get_oom_err(err),
1435        e => get_unexpected_err(e),
1436    }
1437}
1438
1439/// Maps
1440///
1441/// - VK_ERROR_OUT_OF_HOST_MEMORY
1442/// - VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1443fn map_host_oom_and_ioca_err(err: vk::Result) -> crate::DeviceError {
1444    // We don't use VK_KHR_buffer_device_address
1445    // VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1446    map_host_oom_err(err)
1447}
1448
1449/// Maps
1450///
1451/// - VK_ERROR_OUT_OF_HOST_MEMORY
1452/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1453/// - VK_PIPELINE_COMPILE_REQUIRED_EXT
1454/// - VK_ERROR_INVALID_SHADER_NV
1455fn map_pipeline_err(err: vk::Result) -> crate::DeviceError {
1456    // We don't use VK_EXT_pipeline_creation_cache_control
1457    // VK_PIPELINE_COMPILE_REQUIRED_EXT
1458    // We don't use VK_NV_glsl_shader
1459    // VK_ERROR_INVALID_SHADER_NV
1460    map_host_device_oom_err(err)
1461}
1462
1463/// Returns [`crate::DeviceError::Unexpected`] or panics if the `internal_error_panic`
1464/// feature flag is enabled.
1465fn get_unexpected_err(_err: vk::Result) -> crate::DeviceError {
1466    #[cfg(feature = "internal_error_panic")]
1467    panic!("Unexpected Vulkan error: {_err:?}");
1468
1469    #[allow(unreachable_code)]
1470    crate::DeviceError::Unexpected
1471}
1472
1473/// Returns [`crate::DeviceError::OutOfMemory`].
1474fn get_oom_err(_err: vk::Result) -> crate::DeviceError {
1475    crate::DeviceError::OutOfMemory
1476}
1477
1478/// Returns [`crate::DeviceError::Lost`] or panics if the `device_lost_panic`
1479/// feature flag is enabled.
1480fn get_lost_err() -> crate::DeviceError {
1481    #[cfg(feature = "device_lost_panic")]
1482    panic!("Device lost");
1483
1484    #[allow(unreachable_code)]
1485    crate::DeviceError::Lost
1486}
1487
1488#[derive(Clone, Copy, Pod, Zeroable)]
1489#[repr(C)]
1490struct RawTlasInstance {
1491    transform: [f32; 12],
1492    custom_data_and_mask: u32,
1493    shader_binding_table_record_offset_and_flags: u32,
1494    acceleration_structure_reference: u64,
1495}
1496
1497/// Arguments to the [`CreateDeviceCallback`].
1498pub struct CreateDeviceCallbackArgs<'arg, 'pnext, 'this>
1499where
1500    'this: 'pnext,
1501{
1502    /// The extensions to enable for the device. You must not remove anything from this list,
1503    /// but you may add to it.
1504    pub extensions: &'arg mut Vec<&'static CStr>,
1505    /// The physical device features to enable. You may enable features, but must not disable any.
1506    pub device_features: &'arg mut PhysicalDeviceFeatures,
1507    /// The queue create infos for the device. You may add or modify queue create infos as needed.
1508    pub queue_create_infos: &'arg mut Vec<vk::DeviceQueueCreateInfo<'pnext>>,
1509    /// The create info for the device. You may add or modify things in the pnext chain, but
1510    /// do not turn features off. Additionally, do not add things to the list of extensions,
1511    /// or to the feature set, as all changes to that member will be overwritten.
1512    pub create_info: &'arg mut vk::DeviceCreateInfo<'pnext>,
1513    /// We need to have `'this` in the struct, so we can declare that all lifetimes coming from
1514    /// captures in the closure will live longer (and hence satisfy) `'pnext`. However, we
1515    /// don't actually directly use `'this`
1516    _phantom: PhantomData<&'this ()>,
1517}
1518
1519/// Callback to allow changing the vulkan device creation parameters.
1520///
1521/// # Safety:
1522/// - If you want to add extensions, add the to the `Vec<'static CStr>` not the create info,
1523///   as the create info value will be overwritten.
1524/// - Callback must not remove features.
1525/// - Callback must not change anything to what the instance does not support.
1526pub type CreateDeviceCallback<'this> =
1527    dyn for<'arg, 'pnext> FnOnce(CreateDeviceCallbackArgs<'arg, 'pnext, 'this>) + 'this;
1528
1529/// Arguments to the [`CreateInstanceCallback`].
1530pub struct CreateInstanceCallbackArgs<'arg, 'pnext, 'this>
1531where
1532    'this: 'pnext,
1533{
1534    /// The extensions to enable for the instance. You must not remove anything from this list,
1535    /// but you may add to it.
1536    pub extensions: &'arg mut Vec<&'static CStr>,
1537    /// The create info for the instance. You may add or modify things in the pnext chain, but
1538    /// do not turn features off. Additionally, do not add things to the list of extensions,
1539    /// all changes to that member will be overwritten.
1540    pub create_info: &'arg mut vk::InstanceCreateInfo<'pnext>,
1541    /// Vulkan entry point.
1542    pub entry: &'arg ash::Entry,
1543    /// We need to have `'this` in the struct, so we can declare that all lifetimes coming from
1544    /// captures in the closure will live longer (and hence satisfy) `'pnext`. However, we
1545    /// don't actually directly use `'this`
1546    _phantom: PhantomData<&'this ()>,
1547}
1548
1549/// Callback to allow changing the vulkan instance creation parameters.
1550///
1551/// # Safety:
1552/// - If you want to add extensions, add the to the `Vec<'static CStr>` not the create info,
1553///   as the create info value will be overwritten.
1554/// - Callback must not remove features.
1555/// - Callback must not change anything to what the instance does not support.
1556pub type CreateInstanceCallback<'this> =
1557    dyn for<'arg, 'pnext> FnOnce(CreateInstanceCallbackArgs<'arg, 'pnext, 'this>) + 'this;