wgpu_hal/vulkan/
mod.rs

1/*!
2# Vulkan API internals.
3
4## Stack memory
5
6Ash expects slices, which we don't generally have available.
7We cope with this requirement by the combination of the following ways:
8  - temporarily allocating `Vec` on heap, where overhead is permitted
9  - growing temporary local storage
10
11## Framebuffers and Render passes
12
13Render passes are cached on the device and kept forever.
14
15Framebuffers are also cached on the device, but they are removed when
16any of the image views (they have) gets removed.
17If Vulkan supports image-less framebuffers,
18then the actual views are excluded from the framebuffer key.
19
20## Fences
21
22If timeline semaphores are available, they are used 1:1 with wgpu-hal fences.
23Otherwise, we manage a pool of `VkFence` objects behind each `hal::Fence`.
24
25!*/
26
27mod adapter;
28mod command;
29pub mod conv;
30mod device;
31mod drm;
32mod instance;
33mod sampler;
34mod semaphore_list;
35mod swapchain;
36
37pub use adapter::PhysicalDeviceFeatures;
38
39use alloc::{boxed::Box, ffi::CString, sync::Arc, vec::Vec};
40use core::{
41    borrow::Borrow,
42    ffi::CStr,
43    fmt,
44    marker::PhantomData,
45    mem::{self, ManuallyDrop},
46    num::NonZeroU32,
47};
48
49use arrayvec::ArrayVec;
50use ash::{ext, khr, vk};
51use bytemuck::{Pod, Zeroable};
52use hashbrown::HashSet;
53use parking_lot::{Mutex, RwLock};
54
55use naga::FastHashMap;
56use wgt::InternalCounter;
57
58use semaphore_list::SemaphoreList;
59
60use crate::vulkan::semaphore_list::{SemaphoreListMode, SemaphoreType};
61
62const MAX_TOTAL_ATTACHMENTS: usize = crate::MAX_COLOR_ATTACHMENTS * 2 + 1;
63
64#[derive(Clone, Debug)]
65pub struct Api;
66
67impl crate::Api for Api {
68    const VARIANT: wgt::Backend = wgt::Backend::Vulkan;
69
70    type Instance = Instance;
71    type Surface = Surface;
72    type Adapter = Adapter;
73    type Device = Device;
74
75    type Queue = Queue;
76    type CommandEncoder = CommandEncoder;
77    type CommandBuffer = CommandBuffer;
78
79    type Buffer = Buffer;
80    type Texture = Texture;
81    type SurfaceTexture = SurfaceTexture;
82    type TextureView = TextureView;
83    type Sampler = Sampler;
84    type QuerySet = QuerySet;
85    type Fence = Fence;
86    type AccelerationStructure = AccelerationStructure;
87    type PipelineCache = PipelineCache;
88
89    type BindGroupLayout = BindGroupLayout;
90    type BindGroup = BindGroup;
91    type PipelineLayout = PipelineLayout;
92    type ShaderModule = ShaderModule;
93    type RenderPipeline = RenderPipeline;
94    type ComputePipeline = ComputePipeline;
95}
96
97crate::impl_dyn_resource!(
98    Adapter,
99    AccelerationStructure,
100    BindGroup,
101    BindGroupLayout,
102    Buffer,
103    CommandBuffer,
104    CommandEncoder,
105    ComputePipeline,
106    Device,
107    Fence,
108    Instance,
109    PipelineCache,
110    PipelineLayout,
111    QuerySet,
112    Queue,
113    RenderPipeline,
114    Sampler,
115    ShaderModule,
116    Surface,
117    SurfaceTexture,
118    Texture,
119    TextureView
120);
121
122struct DebugUtils {
123    extension: ext::debug_utils::Instance,
124    messenger: vk::DebugUtilsMessengerEXT,
125
126    /// Owning pointer to the debug messenger callback user data.
127    ///
128    /// `InstanceShared::drop` destroys the debug messenger before
129    /// dropping this, so the callback should never receive a dangling
130    /// user data pointer.
131    #[allow(dead_code)]
132    callback_data: Box<DebugUtilsMessengerUserData>,
133}
134
135pub struct DebugUtilsCreateInfo {
136    severity: vk::DebugUtilsMessageSeverityFlagsEXT,
137    message_type: vk::DebugUtilsMessageTypeFlagsEXT,
138    callback_data: Box<DebugUtilsMessengerUserData>,
139}
140
141#[derive(Debug)]
142/// The properties related to the validation layer needed for the
143/// DebugUtilsMessenger for their workarounds
144struct ValidationLayerProperties {
145    /// Validation layer description, from `vk::LayerProperties`.
146    layer_description: CString,
147
148    /// Validation layer specification version, from `vk::LayerProperties`.
149    layer_spec_version: u32,
150}
151
152/// User data needed by `instance::debug_utils_messenger_callback`.
153///
154/// When we create the [`vk::DebugUtilsMessengerEXT`], the `pUserData`
155/// pointer refers to one of these values.
156#[derive(Debug)]
157pub struct DebugUtilsMessengerUserData {
158    /// The properties related to the validation layer, if present
159    validation_layer_properties: Option<ValidationLayerProperties>,
160
161    /// If the OBS layer is present. OBS never increments the version of their layer,
162    /// so there's no reason to have the version.
163    has_obs_layer: bool,
164}
165
166pub struct InstanceShared {
167    raw: ash::Instance,
168    extensions: Vec<&'static CStr>,
169    flags: wgt::InstanceFlags,
170    memory_budget_thresholds: wgt::MemoryBudgetThresholds,
171    debug_utils: Option<DebugUtils>,
172    get_physical_device_properties: Option<khr::get_physical_device_properties2::Instance>,
173    entry: ash::Entry,
174    has_nv_optimus: bool,
175    android_sdk_version: u32,
176    /// The instance API version.
177    ///
178    /// Which is the version of Vulkan supported for instance-level functionality.
179    ///
180    /// It is associated with a `VkInstance` and its children,
181    /// except for a `VkPhysicalDevice` and its children.
182    instance_api_version: u32,
183
184    // The `drop_guard` field must be the last field of this struct so it is dropped last.
185    // Do not add new fields after it.
186    drop_guard: Option<crate::DropGuard>,
187}
188
189pub struct Instance {
190    shared: Arc<InstanceShared>,
191}
192
193pub struct Surface {
194    inner: ManuallyDrop<Box<dyn swapchain::Surface>>,
195    swapchain: RwLock<Option<Box<dyn swapchain::Swapchain>>>,
196}
197
198impl Surface {
199    /// Returns the raw Vulkan surface handle.
200    ///
201    /// Returns `None` if the surface is a DXGI surface.
202    pub unsafe fn raw_native_handle(&self) -> Option<vk::SurfaceKHR> {
203        Some(
204            self.inner
205                .as_any()
206                .downcast_ref::<swapchain::NativeSurface>()?
207                .as_raw(),
208        )
209    }
210
211    /// Get the raw Vulkan swapchain associated with this surface.
212    ///
213    /// Returns [`None`] if the surface is not configured or if the swapchain
214    /// is a DXGI swapchain.
215    pub fn raw_native_swapchain(&self) -> Option<vk::SwapchainKHR> {
216        let read = self.swapchain.read();
217        Some(
218            read.as_ref()?
219                .as_any()
220                .downcast_ref::<swapchain::NativeSwapchain>()?
221                .as_raw(),
222        )
223    }
224
225    /// Set the present timing information which will be used for the next [presentation](crate::Queue::present()) of this surface,
226    /// using [VK_GOOGLE_display_timing].
227    ///
228    /// This can be used to give an id to presentations, for future use of [`vk::PastPresentationTimingGOOGLE`].
229    /// Note that `wgpu-hal` does *not* provide a way to use that API - you should manually access this through [`ash`].
230    ///
231    /// This can also be used to add a "not before" timestamp to the presentation.
232    ///
233    /// The exact semantics of the fields are also documented in the [specification](https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPresentTimeGOOGLE.html) for the extension.
234    ///
235    /// # Panics
236    ///
237    /// - If the surface hasn't been configured.
238    /// - If the surface has been configured for a DXGI swapchain.
239    /// - If the device doesn't [support present timing](wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING).
240    ///
241    /// [VK_GOOGLE_display_timing]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_GOOGLE_display_timing.html
242    #[track_caller]
243    pub fn set_next_present_time(&self, present_timing: vk::PresentTimeGOOGLE) {
244        let mut swapchain = self.swapchain.write();
245        swapchain
246            .as_mut()
247            .expect("Surface should have been configured")
248            .as_any_mut()
249            .downcast_mut::<swapchain::NativeSwapchain>()
250            .expect("Surface should have a native Vulkan swapchain")
251            .set_next_present_time(present_timing);
252    }
253}
254
255#[derive(Debug)]
256pub struct SurfaceTexture {
257    index: u32,
258    texture: Texture,
259    metadata: Box<dyn swapchain::SurfaceTextureMetadata>,
260}
261
262impl crate::DynSurfaceTexture for SurfaceTexture {}
263
264impl Borrow<Texture> for SurfaceTexture {
265    fn borrow(&self) -> &Texture {
266        &self.texture
267    }
268}
269
270impl Borrow<dyn crate::DynTexture> for SurfaceTexture {
271    fn borrow(&self) -> &dyn crate::DynTexture {
272        &self.texture
273    }
274}
275
276pub struct Adapter {
277    raw: vk::PhysicalDevice,
278    instance: Arc<InstanceShared>,
279    //queue_families: Vec<vk::QueueFamilyProperties>,
280    known_memory_flags: vk::MemoryPropertyFlags,
281    phd_capabilities: adapter::PhysicalDeviceProperties,
282    phd_features: PhysicalDeviceFeatures,
283    downlevel_flags: wgt::DownlevelFlags,
284    private_caps: PrivateCapabilities,
285    workarounds: Workarounds,
286}
287
288// TODO there's no reason why this can't be unified--the function pointers should all be the same--it's not clear how to do this with `ash`.
289enum ExtensionFn<T> {
290    /// The loaded function pointer struct for an extension.
291    Extension(T),
292    /// The extension was promoted to a core version of Vulkan and the functions on `ash`'s `DeviceV1_x` traits should be used.
293    Promoted,
294}
295
296struct DeviceExtensionFunctions {
297    debug_utils: Option<ext::debug_utils::Device>,
298    draw_indirect_count: Option<khr::draw_indirect_count::Device>,
299    timeline_semaphore: Option<ExtensionFn<khr::timeline_semaphore::Device>>,
300    ray_tracing: Option<RayTracingDeviceExtensionFunctions>,
301    mesh_shading: Option<ext::mesh_shader::Device>,
302    #[cfg_attr(not(unix), allow(dead_code))]
303    external_memory_fd: Option<khr::external_memory_fd::Device>,
304}
305
306struct RayTracingDeviceExtensionFunctions {
307    acceleration_structure: khr::acceleration_structure::Device,
308    buffer_device_address: khr::buffer_device_address::Device,
309}
310
311/// Set of internal capabilities, which don't show up in the exposed
312/// device geometry, but affect the code paths taken internally.
313#[derive(Clone, Debug)]
314struct PrivateCapabilities {
315    image_view_usage: bool,
316    timeline_semaphores: bool,
317    texture_d24: bool,
318    texture_d24_s8: bool,
319    texture_s8: bool,
320    /// Ability to present contents to any screen. Only needed to work around broken platform configurations.
321    can_present: bool,
322    non_coherent_map_mask: wgt::BufferAddress,
323    multi_draw_indirect: bool,
324    max_draw_indirect_count: u32,
325
326    /// True if this adapter advertises the [`robustBufferAccess`][vrba] feature.
327    ///
328    /// Note that Vulkan's `robustBufferAccess` is not sufficient to implement
329    /// `wgpu_hal`'s guarantee that shaders will not access buffer contents via
330    /// a given bindgroup binding outside that binding's [accessible
331    /// region][ar]. Enabling `robustBufferAccess` does ensure that
332    /// out-of-bounds reads and writes are not undefined behavior (that's good),
333    /// but still permits out-of-bounds reads to return data from anywhere
334    /// within the buffer, not just the accessible region.
335    ///
336    /// [ar]: ../struct.BufferBinding.html#accessible-region
337    /// [vrba]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#features-robustBufferAccess
338    robust_buffer_access: bool,
339
340    robust_image_access: bool,
341
342    /// True if this adapter supports the [`VK_EXT_robustness2`] extension's
343    /// [`robustBufferAccess2`] feature.
344    ///
345    /// This is sufficient to implement `wgpu_hal`'s [required bounds-checking][ar] of
346    /// shader accesses to buffer contents. If this feature is not available,
347    /// this backend must have Naga inject bounds checks in the generated
348    /// SPIR-V.
349    ///
350    /// [`VK_EXT_robustness2`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_EXT_robustness2.html
351    /// [`robustBufferAccess2`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPhysicalDeviceRobustness2FeaturesEXT.html#features-robustBufferAccess2
352    /// [ar]: ../struct.BufferBinding.html#accessible-region
353    robust_buffer_access2: bool,
354
355    robust_image_access2: bool,
356    zero_initialize_workgroup_memory: bool,
357    image_format_list: bool,
358    maximum_samplers: u32,
359
360    /// True if this adapter supports the [`VK_KHR_shader_integer_dot_product`] extension
361    /// (promoted to Vulkan 1.3).
362    ///
363    /// This is used to generate optimized code for WGSL's `dot4{I, U}8Packed`.
364    ///
365    /// [`VK_KHR_shader_integer_dot_product`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_KHR_shader_integer_dot_product.html
366    shader_integer_dot_product: bool,
367
368    /// True if this adapter supports 8-bit integers provided by the
369    /// [`VK_KHR_shader_float16_int8`] extension (promoted to Vulkan 1.2).
370    ///
371    /// Allows shaders to declare the "Int8" capability. Note, however, that this
372    /// feature alone allows the use of 8-bit integers "only in the `Private`,
373    /// `Workgroup` (for non-Block variables), and `Function` storage classes"
374    /// ([see spec]). To use 8-bit integers in the interface storage classes (e.g.,
375    /// `StorageBuffer`), you also need to enable the corresponding feature in
376    /// `VkPhysicalDevice8BitStorageFeatures` and declare the corresponding SPIR-V
377    /// capability (e.g., `StorageBuffer8BitAccess`).
378    ///
379    /// [`VK_KHR_shader_float16_int8`]: https://registry.khronos.org/vulkan/specs/latest/man/html/VK_KHR_shader_float16_int8.html
380    /// [see spec]: https://registry.khronos.org/vulkan/specs/latest/man/html/VkPhysicalDeviceShaderFloat16Int8Features.html#extension-features-shaderInt8
381    shader_int8: bool,
382
383    /// This is done to panic before undefined behavior, and is imperfect.
384    /// Basically, to allow implementations to emulate mv using instancing, if you
385    /// want to draw `n` instances to VR, you must draw `2n` instances, but you
386    /// can never draw more than `u32::MAX` instances. Therefore, when drawing
387    /// multiview on some vulkan implementations, it might restrict the instance
388    /// count, which isn't usually a thing in webgpu. We don't expose this limit
389    /// because its strange, i.e. only occurs on certain vulkan implementations
390    /// if you are drawing more than 128 million instances. We still want to avoid
391    /// undefined behavior in this situation, so we panic if the limit is violated.
392    multiview_instance_index_limit: u32,
393
394    /// BufferUsages::ACCELERATION_STRUCTURE_SCRATCH allows usage as a scratch buffer.
395    /// Vulkan has no way to specify this as a usage, and it maps to other usages, but
396    /// these usages do not have as high of an alignment requirement using the buffer as
397    ///  a scratch buffer when building acceleration structures.
398    scratch_buffer_alignment: u32,
399}
400
401bitflags::bitflags!(
402    /// Workaround flags.
403    #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
404    pub struct Workarounds: u32 {
405        /// Only generate SPIR-V for one entry point at a time.
406        const SEPARATE_ENTRY_POINTS = 0x1;
407        /// Qualcomm OOMs when there are zero color attachments but a non-null pointer
408        /// to a subpass resolve attachment array. This nulls out that pointer in that case.
409        const EMPTY_RESOLVE_ATTACHMENT_LISTS = 0x2;
410        /// If the following code returns false, then nvidia will end up filling the wrong range.
411        ///
412        /// ```skip
413        /// fn nvidia_succeeds() -> bool {
414        ///   # let (copy_length, start_offset) = (0, 0);
415        ///     if copy_length >= 4096 {
416        ///         if start_offset % 16 != 0 {
417        ///             if copy_length == 4096 {
418        ///                 return true;
419        ///             }
420        ///             if copy_length % 16 == 0 {
421        ///                 return false;
422        ///             }
423        ///         }
424        ///     }
425        ///     true
426        /// }
427        /// ```
428        ///
429        /// As such, we need to make sure all calls to vkCmdFillBuffer are aligned to 16 bytes
430        /// if they cover a range of 4096 bytes or more.
431        const FORCE_FILL_BUFFER_WITH_SIZE_GREATER_4096_ALIGNED_OFFSET_16 = 0x4;
432    }
433);
434
435#[derive(Clone, Debug, Eq, Hash, PartialEq)]
436struct AttachmentKey {
437    format: vk::Format,
438    layout: vk::ImageLayout,
439    ops: crate::AttachmentOps,
440}
441
442impl AttachmentKey {
443    /// Returns an attachment key for a compatible attachment.
444    fn compatible(format: vk::Format, layout: vk::ImageLayout) -> Self {
445        Self {
446            format,
447            layout,
448            ops: crate::AttachmentOps::all(),
449        }
450    }
451}
452
453#[derive(Clone, Eq, Hash, PartialEq)]
454struct ColorAttachmentKey {
455    base: AttachmentKey,
456    resolve: Option<AttachmentKey>,
457}
458
459#[derive(Clone, Eq, Hash, PartialEq)]
460struct DepthStencilAttachmentKey {
461    base: AttachmentKey,
462    stencil_ops: crate::AttachmentOps,
463}
464
465#[derive(Clone, Eq, Default, Hash, PartialEq)]
466struct RenderPassKey {
467    colors: ArrayVec<Option<ColorAttachmentKey>, { crate::MAX_COLOR_ATTACHMENTS }>,
468    depth_stencil: Option<DepthStencilAttachmentKey>,
469    sample_count: u32,
470    multiview_mask: Option<NonZeroU32>,
471}
472
473struct DeviceShared {
474    raw: ash::Device,
475    family_index: u32,
476    queue_index: u32,
477    raw_queue: vk::Queue,
478    instance: Arc<InstanceShared>,
479    physical_device: vk::PhysicalDevice,
480    enabled_extensions: Vec<&'static CStr>,
481    extension_fns: DeviceExtensionFunctions,
482    vendor_id: u32,
483    pipeline_cache_validation_key: [u8; 16],
484    timestamp_period: f32,
485    private_caps: PrivateCapabilities,
486    workarounds: Workarounds,
487    features: wgt::Features,
488    render_passes: Mutex<FastHashMap<RenderPassKey, vk::RenderPass>>,
489    sampler_cache: Mutex<sampler::SamplerCache>,
490    memory_allocations_counter: InternalCounter,
491
492    /// Because we have cached framebuffers which are not deleted from until
493    /// the device is destroyed, if the implementation of vulkan re-uses handles
494    /// we need some way to differentiate between the old handle and the new handle.
495    /// This factory allows us to have a dedicated identity value for each texture.
496    texture_identity_factory: ResourceIdentityFactory<vk::Image>,
497    /// As above, for texture views.
498    texture_view_identity_factory: ResourceIdentityFactory<vk::ImageView>,
499
500    empty_descriptor_set_layout: vk::DescriptorSetLayout,
501
502    // The `drop_guard` field must be the last field of this struct so it is dropped last.
503    // Do not add new fields after it.
504    drop_guard: Option<crate::DropGuard>,
505}
506
507impl Drop for DeviceShared {
508    fn drop(&mut self) {
509        for &raw in self.render_passes.lock().values() {
510            unsafe { self.raw.destroy_render_pass(raw, None) };
511        }
512        unsafe {
513            self.raw
514                .destroy_descriptor_set_layout(self.empty_descriptor_set_layout, None)
515        };
516        if self.drop_guard.is_none() {
517            unsafe { self.raw.destroy_device(None) };
518        }
519    }
520}
521
522pub struct Device {
523    mem_allocator: Mutex<gpu_allocator::vulkan::Allocator>,
524    desc_allocator:
525        Mutex<gpu_descriptor::DescriptorAllocator<vk::DescriptorPool, vk::DescriptorSet>>,
526    valid_ash_memory_types: u32,
527    naga_options: naga::back::spv::Options<'static>,
528    #[cfg(feature = "renderdoc")]
529    render_doc: crate::auxil::renderdoc::RenderDoc,
530    counters: Arc<wgt::HalCounters>,
531    // Struct members are dropped from first to last, put the Device last to ensure that
532    // all resources that depends on it are destroyed before it like the mem_allocator
533    shared: Arc<DeviceShared>,
534}
535
536impl Drop for Device {
537    fn drop(&mut self) {
538        unsafe { self.desc_allocator.lock().cleanup(&*self.shared) };
539    }
540}
541
542/// Semaphores for forcing queue submissions to run in order.
543///
544/// The [`wgpu_hal::Queue`] trait promises that if two calls to [`submit`] are
545/// ordered, then the first submission will finish on the GPU before the second
546/// submission begins. To get this behavior on Vulkan we need to pass semaphores
547/// to [`vkQueueSubmit`] for the commands to wait on before beginning execution,
548/// and to signal when their execution is done.
549///
550/// Normally this can be done with a single semaphore, waited on and then
551/// signalled for each submission. At any given time there's exactly one
552/// submission that would signal the semaphore, and exactly one waiting on it,
553/// as Vulkan requires.
554///
555/// However, as of Oct 2021, bug [#5508] in the Mesa ANV drivers caused them to
556/// hang if we use a single semaphore. The workaround is to alternate between
557/// two semaphores. The bug has been fixed in Mesa, but we should probably keep
558/// the workaround until, say, Oct 2026.
559///
560/// [`wgpu_hal::Queue`]: crate::Queue
561/// [`submit`]: crate::Queue::submit
562/// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
563/// [#5508]: https://gitlab.freedesktop.org/mesa/mesa/-/issues/5508
564#[derive(Clone)]
565struct RelaySemaphores {
566    /// The semaphore the next submission should wait on before beginning
567    /// execution on the GPU. This is `None` for the first submission, which
568    /// should not wait on anything at all.
569    wait: Option<vk::Semaphore>,
570
571    /// The semaphore the next submission should signal when it has finished
572    /// execution on the GPU.
573    signal: vk::Semaphore,
574}
575
576impl RelaySemaphores {
577    fn new(device: &DeviceShared) -> Result<Self, crate::DeviceError> {
578        Ok(Self {
579            wait: None,
580            signal: device.new_binary_semaphore("RelaySemaphores: 1")?,
581        })
582    }
583
584    /// Advances the semaphores, returning the semaphores that should be used for a submission.
585    fn advance(&mut self, device: &DeviceShared) -> Result<Self, crate::DeviceError> {
586        let old = self.clone();
587
588        // Build the state for the next submission.
589        match self.wait {
590            None => {
591                // The `old` values describe the first submission to this queue.
592                // The second submission should wait on `old.signal`, and then
593                // signal a new semaphore which we'll create now.
594                self.wait = Some(old.signal);
595                self.signal = device.new_binary_semaphore("RelaySemaphores: 2")?;
596            }
597            Some(ref mut wait) => {
598                // What this submission signals, the next should wait.
599                mem::swap(wait, &mut self.signal);
600            }
601        };
602
603        Ok(old)
604    }
605
606    /// Destroys the semaphores.
607    unsafe fn destroy(&self, device: &ash::Device) {
608        unsafe {
609            if let Some(wait) = self.wait {
610                device.destroy_semaphore(wait, None);
611            }
612            device.destroy_semaphore(self.signal, None);
613        }
614    }
615}
616
617pub struct Queue {
618    raw: vk::Queue,
619    device: Arc<DeviceShared>,
620    family_index: u32,
621    relay_semaphores: Mutex<RelaySemaphores>,
622    signal_semaphores: Mutex<SemaphoreList>,
623}
624
625impl Queue {
626    pub fn as_raw(&self) -> vk::Queue {
627        self.raw
628    }
629}
630
631impl Drop for Queue {
632    fn drop(&mut self) {
633        unsafe { self.relay_semaphores.lock().destroy(&self.device.raw) };
634    }
635}
636#[derive(Debug)]
637enum BufferMemoryBacking {
638    Managed(gpu_allocator::vulkan::Allocation),
639    VulkanMemory {
640        memory: vk::DeviceMemory,
641        offset: u64,
642        size: u64,
643    },
644}
645impl BufferMemoryBacking {
646    fn memory(&self) -> vk::DeviceMemory {
647        match self {
648            Self::Managed(m) => unsafe { m.memory() },
649            Self::VulkanMemory { memory, .. } => *memory,
650        }
651    }
652    fn offset(&self) -> u64 {
653        match self {
654            Self::Managed(m) => m.offset(),
655            Self::VulkanMemory { offset, .. } => *offset,
656        }
657    }
658    fn size(&self) -> u64 {
659        match self {
660            Self::Managed(m) => m.size(),
661            Self::VulkanMemory { size, .. } => *size,
662        }
663    }
664}
665#[derive(Debug)]
666pub struct Buffer {
667    raw: vk::Buffer,
668    allocation: Option<Mutex<BufferMemoryBacking>>,
669}
670impl Buffer {
671    /// # Safety
672    ///
673    /// - `vk_buffer`'s memory must be managed by the caller
674    /// - Externally imported buffers can't be mapped by `wgpu`
675    pub unsafe fn from_raw(vk_buffer: vk::Buffer) -> Self {
676        Self {
677            raw: vk_buffer,
678            allocation: None,
679        }
680    }
681    /// # Safety
682    /// - We will use this buffer and the buffer's backing memory range as if we have exclusive ownership over it, until the wgpu resource is dropped and the wgpu-hal object is cleaned up
683    /// - Externally imported buffers can't be mapped by `wgpu`
684    /// - `offset` and `size` must be valid with the allocation of `memory`
685    pub unsafe fn from_raw_managed(
686        vk_buffer: vk::Buffer,
687        memory: vk::DeviceMemory,
688        offset: u64,
689        size: u64,
690    ) -> Self {
691        Self {
692            raw: vk_buffer,
693            allocation: Some(Mutex::new(BufferMemoryBacking::VulkanMemory {
694                memory,
695                offset,
696                size,
697            })),
698        }
699    }
700}
701
702impl crate::DynBuffer for Buffer {}
703
704#[derive(Debug)]
705pub struct AccelerationStructure {
706    raw: vk::AccelerationStructureKHR,
707    buffer: vk::Buffer,
708    allocation: gpu_allocator::vulkan::Allocation,
709    compacted_size_query: Option<vk::QueryPool>,
710}
711
712impl crate::DynAccelerationStructure for AccelerationStructure {}
713
714#[derive(Debug)]
715pub enum TextureMemory {
716    // shared memory in GPU allocator (owned by wgpu-hal)
717    Allocation(gpu_allocator::vulkan::Allocation),
718
719    // dedicated memory (owned by wgpu-hal)
720    Dedicated(vk::DeviceMemory),
721
722    // memory not owned by wgpu
723    External,
724}
725
726#[derive(Debug)]
727pub struct Texture {
728    raw: vk::Image,
729    memory: TextureMemory,
730    format: wgt::TextureFormat,
731    copy_size: crate::CopyExtent,
732    identity: ResourceIdentity<vk::Image>,
733
734    // The `drop_guard` field must be the last field of this struct so it is dropped last.
735    // Do not add new fields after it.
736    drop_guard: Option<crate::DropGuard>,
737}
738
739impl crate::DynTexture for Texture {}
740
741impl Texture {
742    /// # Safety
743    ///
744    /// - The image handle must not be manually destroyed
745    pub unsafe fn raw_handle(&self) -> vk::Image {
746        self.raw
747    }
748
749    /// # Safety
750    ///
751    /// - The caller must not free the `vk::DeviceMemory` or
752    ///   `gpu_alloc::MemoryBlock` in the returned `TextureMemory`.
753    pub unsafe fn memory(&self) -> &TextureMemory {
754        &self.memory
755    }
756}
757
758#[derive(Debug)]
759pub struct TextureView {
760    raw_texture: vk::Image,
761    raw: vk::ImageView,
762    _layers: NonZeroU32,
763    format: wgt::TextureFormat,
764    raw_format: vk::Format,
765    base_mip_level: u32,
766    dimension: wgt::TextureViewDimension,
767    texture_identity: ResourceIdentity<vk::Image>,
768    view_identity: ResourceIdentity<vk::ImageView>,
769}
770
771impl crate::DynTextureView for TextureView {}
772
773impl TextureView {
774    /// # Safety
775    ///
776    /// - The image view handle must not be manually destroyed
777    pub unsafe fn raw_handle(&self) -> vk::ImageView {
778        self.raw
779    }
780
781    /// Returns the raw texture view, along with its identity.
782    fn identified_raw_view(&self) -> IdentifiedTextureView {
783        IdentifiedTextureView {
784            raw: self.raw,
785            identity: self.view_identity,
786        }
787    }
788}
789
790#[derive(Debug)]
791pub struct Sampler {
792    raw: vk::Sampler,
793    create_info: vk::SamplerCreateInfo<'static>,
794}
795
796impl crate::DynSampler for Sampler {}
797
798/// Information about a binding within a specific BindGroupLayout / BindGroup.
799/// This will be used to construct a [`naga::back::spv::BindingInfo`], where
800/// the descriptor set value will be taken from the index of the group.
801#[derive(Copy, Clone, Debug)]
802struct BindingInfo {
803    binding: u32,
804    binding_array_size: Option<NonZeroU32>,
805}
806
807#[derive(Debug)]
808pub struct BindGroupLayout {
809    raw: vk::DescriptorSetLayout,
810    desc_count: gpu_descriptor::DescriptorTotalCount,
811    /// Sorted list of entries.
812    entries: Box<[wgt::BindGroupLayoutEntry]>,
813    /// Map of original binding index to remapped binding index and optional
814    /// array size.
815    binding_map: Vec<(u32, BindingInfo)>,
816    contains_binding_arrays: bool,
817}
818
819impl crate::DynBindGroupLayout for BindGroupLayout {}
820
821#[derive(Debug)]
822pub struct PipelineLayout {
823    raw: vk::PipelineLayout,
824    binding_map: naga::back::spv::BindingMap,
825}
826
827impl crate::DynPipelineLayout for PipelineLayout {}
828
829#[derive(Debug)]
830pub struct BindGroup {
831    set: gpu_descriptor::DescriptorSet<vk::DescriptorSet>,
832}
833
834impl crate::DynBindGroup for BindGroup {}
835
836/// Miscellaneous allocation recycling pool for `CommandAllocator`.
837#[derive(Default)]
838struct Temp {
839    marker: Vec<u8>,
840    buffer_barriers: Vec<vk::BufferMemoryBarrier<'static>>,
841    image_barriers: Vec<vk::ImageMemoryBarrier<'static>>,
842}
843
844impl Temp {
845    fn clear(&mut self) {
846        self.marker.clear();
847        self.buffer_barriers.clear();
848        self.image_barriers.clear();
849    }
850
851    fn make_c_str(&mut self, name: &str) -> &CStr {
852        self.marker.clear();
853        self.marker.extend_from_slice(name.as_bytes());
854        self.marker.push(0);
855        unsafe { CStr::from_bytes_with_nul_unchecked(&self.marker) }
856    }
857}
858
859/// Generates unique IDs for each resource of type `T`.
860///
861/// Because vk handles are not permanently unique, this
862/// provides a way to generate unique IDs for each resource.
863struct ResourceIdentityFactory<T> {
864    #[cfg(not(target_has_atomic = "64"))]
865    next_id: Mutex<u64>,
866    #[cfg(target_has_atomic = "64")]
867    next_id: core::sync::atomic::AtomicU64,
868    _phantom: PhantomData<T>,
869}
870
871impl<T> ResourceIdentityFactory<T> {
872    fn new() -> Self {
873        Self {
874            #[cfg(not(target_has_atomic = "64"))]
875            next_id: Mutex::new(0),
876            #[cfg(target_has_atomic = "64")]
877            next_id: core::sync::atomic::AtomicU64::new(0),
878            _phantom: PhantomData,
879        }
880    }
881
882    /// Returns a new unique ID for a resource of type `T`.
883    fn next(&self) -> ResourceIdentity<T> {
884        #[cfg(not(target_has_atomic = "64"))]
885        {
886            let mut next_id = self.next_id.lock();
887            let id = *next_id;
888            *next_id += 1;
889            ResourceIdentity {
890                id,
891                _phantom: PhantomData,
892            }
893        }
894
895        #[cfg(target_has_atomic = "64")]
896        ResourceIdentity {
897            id: self
898                .next_id
899                .fetch_add(1, core::sync::atomic::Ordering::Relaxed),
900            _phantom: PhantomData,
901        }
902    }
903}
904
905/// A unique identifier for a resource of type `T`.
906///
907/// This is used as a hashable key for resources, which
908/// is permanently unique through the lifetime of the program.
909#[derive(Debug, Copy, Clone, Eq, Hash, PartialEq)]
910struct ResourceIdentity<T> {
911    id: u64,
912    _phantom: PhantomData<T>,
913}
914
915#[derive(Clone, Eq, Hash, PartialEq)]
916struct FramebufferKey {
917    raw_pass: vk::RenderPass,
918    /// Because this is used as a key in a hash map, we need to include the identity
919    /// so that this hashes differently, even if the ImageView handles are the same
920    /// between different views.
921    attachment_identities: ArrayVec<ResourceIdentity<vk::ImageView>, { MAX_TOTAL_ATTACHMENTS }>,
922    /// While this is redundant for calculating the hash, we need access to an array
923    /// of all the raw ImageViews when we are creating the actual framebuffer,
924    /// so we store this here.
925    attachment_views: ArrayVec<vk::ImageView, { MAX_TOTAL_ATTACHMENTS }>,
926    extent: wgt::Extent3d,
927}
928
929impl FramebufferKey {
930    fn push_view(&mut self, view: IdentifiedTextureView) {
931        self.attachment_identities.push(view.identity);
932        self.attachment_views.push(view.raw);
933    }
934}
935
936/// A texture view paired with its identity.
937#[derive(Copy, Clone)]
938struct IdentifiedTextureView {
939    raw: vk::ImageView,
940    identity: ResourceIdentity<vk::ImageView>,
941}
942
943#[derive(Clone, Eq, Hash, PartialEq)]
944struct TempTextureViewKey {
945    texture: vk::Image,
946    /// As this is used in a hashmap, we need to
947    /// include the identity so that this hashes differently,
948    /// even if the Image handles are the same between different images.
949    texture_identity: ResourceIdentity<vk::Image>,
950    format: vk::Format,
951    mip_level: u32,
952    depth_slice: u32,
953}
954
955pub struct CommandEncoder {
956    raw: vk::CommandPool,
957    device: Arc<DeviceShared>,
958
959    /// The current command buffer, if `self` is in the ["recording"]
960    /// state.
961    ///
962    /// ["recording"]: crate::CommandEncoder
963    ///
964    /// If non-`null`, the buffer is in the Vulkan "recording" state.
965    active: vk::CommandBuffer,
966
967    /// What kind of pass we are currently within: compute or render.
968    bind_point: vk::PipelineBindPoint,
969
970    /// Allocation recycling pool for this encoder.
971    temp: Temp,
972
973    /// A pool of available command buffers.
974    ///
975    /// These are all in the Vulkan "initial" state.
976    free: Vec<vk::CommandBuffer>,
977
978    /// A pool of discarded command buffers.
979    ///
980    /// These could be in any Vulkan state except "pending".
981    discarded: Vec<vk::CommandBuffer>,
982
983    /// If this is true, the active renderpass enabled a debug span,
984    /// and needs to be disabled on renderpass close.
985    rpass_debug_marker_active: bool,
986
987    /// If set, the end of the next render/compute pass will write a timestamp at
988    /// the given pool & location.
989    end_of_pass_timer_query: Option<(vk::QueryPool, u32)>,
990
991    framebuffers: FastHashMap<FramebufferKey, vk::Framebuffer>,
992    temp_texture_views: FastHashMap<TempTextureViewKey, IdentifiedTextureView>,
993
994    counters: Arc<wgt::HalCounters>,
995
996    current_pipeline_is_multiview: bool,
997}
998
999impl Drop for CommandEncoder {
1000    fn drop(&mut self) {
1001        // SAFETY:
1002        //
1003        // VUID-vkDestroyCommandPool-commandPool-00041: wgpu_hal requires that a
1004        // `CommandBuffer` must live until its execution is complete, and that a
1005        // `CommandBuffer` must not outlive the `CommandEncoder` that built it.
1006        // Thus, we know that none of our `CommandBuffers` are in the "pending"
1007        // state.
1008        //
1009        // The other VUIDs are pretty obvious.
1010        unsafe {
1011            // `vkDestroyCommandPool` also frees any command buffers allocated
1012            // from that pool, so there's no need to explicitly call
1013            // `vkFreeCommandBuffers` on `cmd_encoder`'s `free` and `discarded`
1014            // fields.
1015            self.device.raw.destroy_command_pool(self.raw, None);
1016        }
1017
1018        for (_, fb) in self.framebuffers.drain() {
1019            unsafe { self.device.raw.destroy_framebuffer(fb, None) };
1020        }
1021
1022        for (_, view) in self.temp_texture_views.drain() {
1023            unsafe { self.device.raw.destroy_image_view(view.raw, None) };
1024        }
1025
1026        self.counters.command_encoders.sub(1);
1027    }
1028}
1029
1030impl CommandEncoder {
1031    /// # Safety
1032    ///
1033    /// - The command buffer handle must not be manually destroyed
1034    pub unsafe fn raw_handle(&self) -> vk::CommandBuffer {
1035        self.active
1036    }
1037}
1038
1039impl fmt::Debug for CommandEncoder {
1040    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1041        f.debug_struct("CommandEncoder")
1042            .field("raw", &self.raw)
1043            .finish()
1044    }
1045}
1046
1047#[derive(Debug)]
1048pub struct CommandBuffer {
1049    raw: vk::CommandBuffer,
1050}
1051
1052impl crate::DynCommandBuffer for CommandBuffer {}
1053
1054#[derive(Debug)]
1055pub enum ShaderModule {
1056    Raw(vk::ShaderModule),
1057    Intermediate {
1058        naga_shader: crate::NagaShader,
1059        runtime_checks: wgt::ShaderRuntimeChecks,
1060    },
1061}
1062
1063impl crate::DynShaderModule for ShaderModule {}
1064
1065#[derive(Debug)]
1066pub struct RenderPipeline {
1067    raw: vk::Pipeline,
1068    is_multiview: bool,
1069}
1070
1071impl crate::DynRenderPipeline for RenderPipeline {}
1072
1073#[derive(Debug)]
1074pub struct ComputePipeline {
1075    raw: vk::Pipeline,
1076}
1077
1078impl crate::DynComputePipeline for ComputePipeline {}
1079
1080#[derive(Debug)]
1081pub struct PipelineCache {
1082    raw: vk::PipelineCache,
1083}
1084
1085impl crate::DynPipelineCache for PipelineCache {}
1086
1087#[derive(Debug)]
1088pub struct QuerySet {
1089    raw: vk::QueryPool,
1090}
1091
1092impl crate::DynQuerySet for QuerySet {}
1093
1094/// The [`Api::Fence`] type for [`vulkan::Api`].
1095///
1096/// This is an `enum` because there are two possible implementations of
1097/// `wgpu-hal` fences on Vulkan: Vulkan fences, which work on any version of
1098/// Vulkan, and Vulkan timeline semaphores, which are easier and cheaper but
1099/// require non-1.0 features.
1100///
1101/// [`Device::create_fence`] returns a [`TimelineSemaphore`] if
1102/// [`VK_KHR_timeline_semaphore`] is available and enabled, and a [`FencePool`]
1103/// otherwise.
1104///
1105/// [`Api::Fence`]: crate::Api::Fence
1106/// [`vulkan::Api`]: Api
1107/// [`Device::create_fence`]: crate::Device::create_fence
1108/// [`TimelineSemaphore`]: Fence::TimelineSemaphore
1109/// [`VK_KHR_timeline_semaphore`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VK_KHR_timeline_semaphore
1110/// [`FencePool`]: Fence::FencePool
1111#[derive(Debug)]
1112pub enum Fence {
1113    /// A Vulkan [timeline semaphore].
1114    ///
1115    /// These are simpler to use than Vulkan fences, since timeline semaphores
1116    /// work exactly the way [`wpgu_hal::Api::Fence`] is specified to work.
1117    ///
1118    /// [timeline semaphore]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-semaphores
1119    /// [`wpgu_hal::Api::Fence`]: crate::Api::Fence
1120    TimelineSemaphore(vk::Semaphore),
1121
1122    /// A collection of Vulkan [fence]s, each associated with a [`FenceValue`].
1123    ///
1124    /// The effective [`FenceValue`] of this variant is the greater of
1125    /// `last_completed` and the maximum value associated with a signalled fence
1126    /// in `active`.
1127    ///
1128    /// Fences are available in all versions of Vulkan, but since they only have
1129    /// two states, "signaled" and "unsignaled", we need to use a separate fence
1130    /// for each queue submission we might want to wait for, and remember which
1131    /// [`FenceValue`] each one represents.
1132    ///
1133    /// [fence]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-fences
1134    /// [`FenceValue`]: crate::FenceValue
1135    FencePool {
1136        last_completed: crate::FenceValue,
1137        /// The pending fence values have to be ascending.
1138        active: Vec<(crate::FenceValue, vk::Fence)>,
1139        free: Vec<vk::Fence>,
1140    },
1141}
1142
1143impl crate::DynFence for Fence {}
1144
1145impl Fence {
1146    /// Return the highest [`FenceValue`] among the signalled fences in `active`.
1147    ///
1148    /// As an optimization, assume that we already know that the fence has
1149    /// reached `last_completed`, and don't bother checking fences whose values
1150    /// are less than that: those fences remain in the `active` array only
1151    /// because we haven't called `maintain` yet to clean them up.
1152    ///
1153    /// [`FenceValue`]: crate::FenceValue
1154    fn check_active(
1155        device: &ash::Device,
1156        mut last_completed: crate::FenceValue,
1157        active: &[(crate::FenceValue, vk::Fence)],
1158    ) -> Result<crate::FenceValue, crate::DeviceError> {
1159        for &(value, raw) in active.iter() {
1160            unsafe {
1161                if value > last_completed
1162                    && device
1163                        .get_fence_status(raw)
1164                        .map_err(map_host_device_oom_and_lost_err)?
1165                {
1166                    last_completed = value;
1167                }
1168            }
1169        }
1170        Ok(last_completed)
1171    }
1172
1173    /// Return the highest signalled [`FenceValue`] for `self`.
1174    ///
1175    /// [`FenceValue`]: crate::FenceValue
1176    fn get_latest(
1177        &self,
1178        device: &ash::Device,
1179        extension: Option<&ExtensionFn<khr::timeline_semaphore::Device>>,
1180    ) -> Result<crate::FenceValue, crate::DeviceError> {
1181        match *self {
1182            Self::TimelineSemaphore(raw) => unsafe {
1183                Ok(match *extension.unwrap() {
1184                    ExtensionFn::Extension(ref ext) => ext
1185                        .get_semaphore_counter_value(raw)
1186                        .map_err(map_host_device_oom_and_lost_err)?,
1187                    ExtensionFn::Promoted => device
1188                        .get_semaphore_counter_value(raw)
1189                        .map_err(map_host_device_oom_and_lost_err)?,
1190                })
1191            },
1192            Self::FencePool {
1193                last_completed,
1194                ref active,
1195                free: _,
1196            } => Self::check_active(device, last_completed, active),
1197        }
1198    }
1199
1200    /// Trim the internal state of this [`Fence`].
1201    ///
1202    /// This function has no externally visible effect, but you should call it
1203    /// periodically to keep this fence's resource consumption under control.
1204    ///
1205    /// For fences using the [`FencePool`] implementation, this function
1206    /// recycles fences that have been signaled. If you don't call this,
1207    /// [`Queue::submit`] will just keep allocating a new Vulkan fence every
1208    /// time it's called.
1209    ///
1210    /// [`FencePool`]: Fence::FencePool
1211    /// [`Queue::submit`]: crate::Queue::submit
1212    fn maintain(&mut self, device: &ash::Device) -> Result<(), crate::DeviceError> {
1213        match *self {
1214            Self::TimelineSemaphore(_) => {}
1215            Self::FencePool {
1216                ref mut last_completed,
1217                ref mut active,
1218                ref mut free,
1219            } => {
1220                let latest = Self::check_active(device, *last_completed, active)?;
1221                let base_free = free.len();
1222                for &(value, raw) in active.iter() {
1223                    if value <= latest {
1224                        free.push(raw);
1225                    }
1226                }
1227                if free.len() != base_free {
1228                    active.retain(|&(value, _)| value > latest);
1229                    unsafe { device.reset_fences(&free[base_free..]) }
1230                        .map_err(map_device_oom_err)?
1231                }
1232                *last_completed = latest;
1233            }
1234        }
1235        Ok(())
1236    }
1237}
1238
1239impl crate::Queue for Queue {
1240    type A = Api;
1241
1242    unsafe fn submit(
1243        &self,
1244        command_buffers: &[&CommandBuffer],
1245        surface_textures: &[&SurfaceTexture],
1246        (signal_fence, signal_value): (&mut Fence, crate::FenceValue),
1247    ) -> Result<(), crate::DeviceError> {
1248        let mut fence_raw = vk::Fence::null();
1249
1250        let mut wait_semaphores = SemaphoreList::new(SemaphoreListMode::Wait);
1251        let mut signal_semaphores = SemaphoreList::new(SemaphoreListMode::Signal);
1252
1253        // Double check that the same swapchain image isn't being given to us multiple times,
1254        // as that will deadlock when we try to lock them all.
1255        debug_assert!(
1256            {
1257                let mut check = HashSet::with_capacity(surface_textures.len());
1258                // We compare the Box by pointer, as Eq isn't well defined for SurfaceSemaphores.
1259                for st in surface_textures {
1260                    let ptr: *const () = <*const _>::cast(&*st.metadata);
1261                    check.insert(ptr as usize);
1262                }
1263                check.len() == surface_textures.len()
1264            },
1265            "More than one surface texture is being used from the same swapchain. This will cause a deadlock in release."
1266        );
1267
1268        let locked_swapchain_semaphores = surface_textures
1269            .iter()
1270            .map(|st| st.metadata.get_semaphore_guard())
1271            .collect::<Vec<_>>();
1272
1273        for mut semaphores in locked_swapchain_semaphores {
1274            semaphores.set_used_fence_value(signal_value);
1275
1276            // If we're the first submission to operate on this image, wait on
1277            // its acquire semaphore, to make sure the presentation engine is
1278            // done with it.
1279            if let Some(sem) = semaphores.get_acquire_wait_semaphore() {
1280                wait_semaphores.push_wait(sem, vk::PipelineStageFlags::TOP_OF_PIPE);
1281            }
1282
1283            // Get a semaphore to signal when we're done writing to this surface
1284            // image. Presentation of this image will wait for this.
1285            let signal_semaphore = semaphores.get_submit_signal_semaphore(&self.device)?;
1286            signal_semaphores.push_signal(signal_semaphore);
1287        }
1288
1289        let mut guard = self.signal_semaphores.lock();
1290        if !guard.is_empty() {
1291            signal_semaphores.append(&mut guard);
1292        }
1293
1294        // In order for submissions to be strictly ordered, we encode a dependency between each submission
1295        // using a pair of semaphores. This adds a wait if it is needed, and signals the next semaphore.
1296        let semaphore_state = self.relay_semaphores.lock().advance(&self.device)?;
1297
1298        if let Some(sem) = semaphore_state.wait {
1299            wait_semaphores.push_wait(
1300                SemaphoreType::Binary(sem),
1301                vk::PipelineStageFlags::TOP_OF_PIPE,
1302            );
1303        }
1304
1305        signal_semaphores.push_signal(SemaphoreType::Binary(semaphore_state.signal));
1306
1307        // We need to signal our wgpu::Fence if we have one, this adds it to the signal list.
1308        signal_fence.maintain(&self.device.raw)?;
1309        match *signal_fence {
1310            Fence::TimelineSemaphore(raw) => {
1311                signal_semaphores.push_signal(SemaphoreType::Timeline(raw, signal_value));
1312            }
1313            Fence::FencePool {
1314                ref mut active,
1315                ref mut free,
1316                ..
1317            } => {
1318                fence_raw = match free.pop() {
1319                    Some(raw) => raw,
1320                    None => unsafe {
1321                        self.device
1322                            .raw
1323                            .create_fence(&vk::FenceCreateInfo::default(), None)
1324                            .map_err(map_host_device_oom_err)?
1325                    },
1326                };
1327                active.push((signal_value, fence_raw));
1328            }
1329        }
1330
1331        let vk_cmd_buffers = command_buffers
1332            .iter()
1333            .map(|cmd| cmd.raw)
1334            .collect::<Vec<_>>();
1335
1336        let mut vk_info = vk::SubmitInfo::default().command_buffers(&vk_cmd_buffers);
1337        let mut vk_timeline_info = mem::MaybeUninit::uninit();
1338        vk_info = SemaphoreList::add_to_submit(
1339            &mut wait_semaphores,
1340            &mut signal_semaphores,
1341            vk_info,
1342            &mut vk_timeline_info,
1343        );
1344
1345        profiling::scope!("vkQueueSubmit");
1346        unsafe {
1347            self.device
1348                .raw
1349                .queue_submit(self.raw, &[vk_info], fence_raw)
1350                .map_err(map_host_device_oom_and_lost_err)?
1351        };
1352        Ok(())
1353    }
1354
1355    unsafe fn present(
1356        &self,
1357        surface: &Surface,
1358        texture: SurfaceTexture,
1359    ) -> Result<(), crate::SurfaceError> {
1360        let mut swapchain = surface.swapchain.write();
1361
1362        unsafe { swapchain.as_mut().unwrap().present(self, texture) }
1363    }
1364
1365    unsafe fn get_timestamp_period(&self) -> f32 {
1366        self.device.timestamp_period
1367    }
1368
1369    unsafe fn wait_for_idle(&self) -> Result<(), crate::DeviceError> {
1370        unsafe { self.device.raw.queue_wait_idle(self.raw) }
1371            .map_err(map_host_device_oom_and_lost_err)
1372    }
1373}
1374
1375impl Queue {
1376    pub fn raw_device(&self) -> &ash::Device {
1377        &self.device.raw
1378    }
1379
1380    pub fn add_signal_semaphore(&self, semaphore: vk::Semaphore, semaphore_value: Option<u64>) {
1381        let mut guard = self.signal_semaphores.lock();
1382        if let Some(value) = semaphore_value {
1383            guard.push_signal(SemaphoreType::Timeline(semaphore, value));
1384        } else {
1385            guard.push_signal(SemaphoreType::Binary(semaphore));
1386        }
1387    }
1388
1389    /// Remove `semaphore` from the pending signal list if it is still present.
1390    ///
1391    /// Returns `true` if the semaphore was found and removed. If the submit
1392    /// already consumed it, this is a harmless no-op that returns `false`.
1393    pub fn remove_signal_semaphore(&self, semaphore: vk::Semaphore) -> bool {
1394        self.signal_semaphores.lock().remove(semaphore)
1395    }
1396}
1397
1398/// Maps
1399///
1400/// - VK_ERROR_OUT_OF_HOST_MEMORY
1401/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1402fn map_host_device_oom_err(err: vk::Result) -> crate::DeviceError {
1403    match err {
1404        vk::Result::ERROR_OUT_OF_HOST_MEMORY | vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => {
1405            get_oom_err(err)
1406        }
1407        e => get_unexpected_err(e),
1408    }
1409}
1410
1411/// Maps
1412///
1413/// - VK_ERROR_OUT_OF_HOST_MEMORY
1414/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1415/// - VK_ERROR_DEVICE_LOST
1416fn map_host_device_oom_and_lost_err(err: vk::Result) -> crate::DeviceError {
1417    match err {
1418        vk::Result::ERROR_DEVICE_LOST => get_lost_err(),
1419        other => map_host_device_oom_err(other),
1420    }
1421}
1422
1423/// Maps
1424///
1425/// - VK_ERROR_OUT_OF_HOST_MEMORY
1426/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1427/// - VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1428fn map_host_device_oom_and_ioca_err(err: vk::Result) -> crate::DeviceError {
1429    // We don't use VK_KHR_buffer_device_address
1430    // VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1431    map_host_device_oom_err(err)
1432}
1433
1434/// Maps
1435///
1436/// - VK_ERROR_OUT_OF_HOST_MEMORY
1437fn map_host_oom_err(err: vk::Result) -> crate::DeviceError {
1438    match err {
1439        vk::Result::ERROR_OUT_OF_HOST_MEMORY => get_oom_err(err),
1440        e => get_unexpected_err(e),
1441    }
1442}
1443
1444/// Maps
1445///
1446/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1447fn map_device_oom_err(err: vk::Result) -> crate::DeviceError {
1448    match err {
1449        vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => get_oom_err(err),
1450        e => get_unexpected_err(e),
1451    }
1452}
1453
1454/// Maps
1455///
1456/// - VK_ERROR_OUT_OF_HOST_MEMORY
1457/// - VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1458fn map_host_oom_and_ioca_err(err: vk::Result) -> crate::DeviceError {
1459    // We don't use VK_KHR_buffer_device_address
1460    // VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1461    map_host_oom_err(err)
1462}
1463
1464/// Maps
1465///
1466/// - VK_ERROR_OUT_OF_HOST_MEMORY
1467/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1468/// - VK_PIPELINE_COMPILE_REQUIRED_EXT
1469/// - VK_ERROR_INVALID_SHADER_NV
1470fn map_pipeline_err(err: vk::Result) -> crate::DeviceError {
1471    // We don't use VK_EXT_pipeline_creation_cache_control
1472    // VK_PIPELINE_COMPILE_REQUIRED_EXT
1473    // We don't use VK_NV_glsl_shader
1474    // VK_ERROR_INVALID_SHADER_NV
1475    map_host_device_oom_err(err)
1476}
1477
1478/// Returns [`crate::DeviceError::Unexpected`] or panics if the `internal_error_panic`
1479/// feature flag is enabled.
1480fn get_unexpected_err(_err: vk::Result) -> crate::DeviceError {
1481    #[cfg(feature = "internal_error_panic")]
1482    panic!("Unexpected Vulkan error: {_err:?}");
1483
1484    #[allow(unreachable_code)]
1485    crate::DeviceError::Unexpected
1486}
1487
1488/// Returns [`crate::DeviceError::OutOfMemory`].
1489fn get_oom_err(_err: vk::Result) -> crate::DeviceError {
1490    crate::DeviceError::OutOfMemory
1491}
1492
1493/// Returns [`crate::DeviceError::Lost`] or panics if the `device_lost_panic`
1494/// feature flag is enabled.
1495fn get_lost_err() -> crate::DeviceError {
1496    #[cfg(feature = "device_lost_panic")]
1497    panic!("Device lost");
1498
1499    #[allow(unreachable_code)]
1500    crate::DeviceError::Lost
1501}
1502
1503#[derive(Clone, Copy, Pod, Zeroable)]
1504#[repr(C)]
1505struct RawTlasInstance {
1506    transform: [f32; 12],
1507    custom_data_and_mask: u32,
1508    shader_binding_table_record_offset_and_flags: u32,
1509    acceleration_structure_reference: u64,
1510}
1511
1512/// Arguments to the [`CreateDeviceCallback`].
1513pub struct CreateDeviceCallbackArgs<'arg, 'pnext, 'this>
1514where
1515    'this: 'pnext,
1516{
1517    /// The extensions to enable for the device. You must not remove anything from this list,
1518    /// but you may add to it.
1519    pub extensions: &'arg mut Vec<&'static CStr>,
1520    /// The physical device features to enable. You may enable features, but must not disable any.
1521    pub device_features: &'arg mut PhysicalDeviceFeatures,
1522    /// The queue create infos for the device. You may add or modify queue create infos as needed.
1523    pub queue_create_infos: &'arg mut Vec<vk::DeviceQueueCreateInfo<'pnext>>,
1524    /// The create info for the device. You may add or modify things in the pnext chain, but
1525    /// do not turn features off. Additionally, do not add things to the list of extensions,
1526    /// or to the feature set, as all changes to that member will be overwritten.
1527    pub create_info: &'arg mut vk::DeviceCreateInfo<'pnext>,
1528    /// We need to have `'this` in the struct, so we can declare that all lifetimes coming from
1529    /// captures in the closure will live longer (and hence satisfy) `'pnext`. However, we
1530    /// don't actually directly use `'this`
1531    _phantom: PhantomData<&'this ()>,
1532}
1533
1534/// Callback to allow changing the vulkan device creation parameters.
1535///
1536/// # Safety:
1537/// - If you want to add extensions, add the to the `Vec<'static CStr>` not the create info,
1538///   as the create info value will be overwritten.
1539/// - Callback must not remove features.
1540/// - Callback must not change anything to what the instance does not support.
1541pub type CreateDeviceCallback<'this> =
1542    dyn for<'arg, 'pnext> FnOnce(CreateDeviceCallbackArgs<'arg, 'pnext, 'this>) + 'this;
1543
1544/// Arguments to the [`CreateInstanceCallback`].
1545pub struct CreateInstanceCallbackArgs<'arg, 'pnext, 'this>
1546where
1547    'this: 'pnext,
1548{
1549    /// The extensions to enable for the instance. You must not remove anything from this list,
1550    /// but you may add to it.
1551    pub extensions: &'arg mut Vec<&'static CStr>,
1552    /// The create info for the instance. You may add or modify things in the pnext chain, but
1553    /// do not turn features off. Additionally, do not add things to the list of extensions,
1554    /// all changes to that member will be overwritten.
1555    pub create_info: &'arg mut vk::InstanceCreateInfo<'pnext>,
1556    /// Vulkan entry point.
1557    pub entry: &'arg ash::Entry,
1558    /// We need to have `'this` in the struct, so we can declare that all lifetimes coming from
1559    /// captures in the closure will live longer (and hence satisfy) `'pnext`. However, we
1560    /// don't actually directly use `'this`
1561    _phantom: PhantomData<&'this ()>,
1562}
1563
1564/// Callback to allow changing the vulkan instance creation parameters.
1565///
1566/// # Safety:
1567/// - If you want to add extensions, add the to the `Vec<'static CStr>` not the create info,
1568///   as the create info value will be overwritten.
1569/// - Callback must not remove features.
1570/// - Callback must not change anything to what the instance does not support.
1571pub type CreateInstanceCallback<'this> =
1572    dyn for<'arg, 'pnext> FnOnce(CreateInstanceCallbackArgs<'arg, 'pnext, 'this>) + 'this;