wgpu_hal/vulkan/mod.rs
1/*!
2# Vulkan API internals.
3
4## Stack memory
5
6Ash expects slices, which we don't generally have available.
7We cope with this requirement by the combination of the following ways:
8 - temporarily allocating `Vec` on heap, where overhead is permitted
9 - growing temporary local storage
10
11## Framebuffers and Render passes
12
13Render passes are cached on the device and kept forever.
14
15Framebuffers are also cached on the device, but they are removed when
16any of the image views (they have) gets removed.
17If Vulkan supports image-less framebuffers,
18then the actual views are excluded from the framebuffer key.
19
20## Fences
21
22If timeline semaphores are available, they are used 1:1 with wgpu-hal fences.
23Otherwise, we manage a pool of `VkFence` objects behind each `hal::Fence`.
24
25!*/
26
27mod adapter;
28mod command;
29pub mod conv;
30mod device;
31mod drm;
32mod instance;
33mod sampler;
34mod semaphore_list;
35mod swapchain;
36
37pub use adapter::PhysicalDeviceFeatures;
38
39use alloc::{boxed::Box, ffi::CString, sync::Arc, vec::Vec};
40use core::{
41 borrow::Borrow,
42 ffi::CStr,
43 fmt,
44 marker::PhantomData,
45 mem::{self, ManuallyDrop},
46 num::NonZeroU32,
47};
48
49use arrayvec::ArrayVec;
50use ash::{ext, khr, vk};
51use bytemuck::{Pod, Zeroable};
52use hashbrown::HashSet;
53use parking_lot::{Mutex, RwLock};
54
55use naga::FastHashMap;
56use wgt::InternalCounter;
57
58use semaphore_list::SemaphoreList;
59
60use crate::vulkan::semaphore_list::{SemaphoreListMode, SemaphoreType};
61
62const MAX_TOTAL_ATTACHMENTS: usize = crate::MAX_COLOR_ATTACHMENTS * 2 + 1;
63
64#[derive(Clone, Debug)]
65pub struct Api;
66
67impl crate::Api for Api {
68 const VARIANT: wgt::Backend = wgt::Backend::Vulkan;
69
70 type Instance = Instance;
71 type Surface = Surface;
72 type Adapter = Adapter;
73 type Device = Device;
74
75 type Queue = Queue;
76 type CommandEncoder = CommandEncoder;
77 type CommandBuffer = CommandBuffer;
78
79 type Buffer = Buffer;
80 type Texture = Texture;
81 type SurfaceTexture = SurfaceTexture;
82 type TextureView = TextureView;
83 type Sampler = Sampler;
84 type QuerySet = QuerySet;
85 type Fence = Fence;
86 type AccelerationStructure = AccelerationStructure;
87 type PipelineCache = PipelineCache;
88
89 type BindGroupLayout = BindGroupLayout;
90 type BindGroup = BindGroup;
91 type PipelineLayout = PipelineLayout;
92 type ShaderModule = ShaderModule;
93 type RenderPipeline = RenderPipeline;
94 type ComputePipeline = ComputePipeline;
95}
96
97crate::impl_dyn_resource!(
98 Adapter,
99 AccelerationStructure,
100 BindGroup,
101 BindGroupLayout,
102 Buffer,
103 CommandBuffer,
104 CommandEncoder,
105 ComputePipeline,
106 Device,
107 Fence,
108 Instance,
109 PipelineCache,
110 PipelineLayout,
111 QuerySet,
112 Queue,
113 RenderPipeline,
114 Sampler,
115 ShaderModule,
116 Surface,
117 SurfaceTexture,
118 Texture,
119 TextureView
120);
121
122struct DebugUtils {
123 extension: ext::debug_utils::Instance,
124 messenger: vk::DebugUtilsMessengerEXT,
125
126 /// Owning pointer to the debug messenger callback user data.
127 ///
128 /// `InstanceShared::drop` destroys the debug messenger before
129 /// dropping this, so the callback should never receive a dangling
130 /// user data pointer.
131 #[allow(dead_code)]
132 callback_data: Box<DebugUtilsMessengerUserData>,
133}
134
135pub struct DebugUtilsCreateInfo {
136 severity: vk::DebugUtilsMessageSeverityFlagsEXT,
137 message_type: vk::DebugUtilsMessageTypeFlagsEXT,
138 callback_data: Box<DebugUtilsMessengerUserData>,
139}
140
141#[derive(Debug)]
142/// The properties related to the validation layer needed for the
143/// DebugUtilsMessenger for their workarounds
144struct ValidationLayerProperties {
145 /// Validation layer description, from `vk::LayerProperties`.
146 layer_description: CString,
147
148 /// Validation layer specification version, from `vk::LayerProperties`.
149 layer_spec_version: u32,
150}
151
152/// User data needed by `instance::debug_utils_messenger_callback`.
153///
154/// When we create the [`vk::DebugUtilsMessengerEXT`], the `pUserData`
155/// pointer refers to one of these values.
156#[derive(Debug)]
157pub struct DebugUtilsMessengerUserData {
158 /// The properties related to the validation layer, if present
159 validation_layer_properties: Option<ValidationLayerProperties>,
160
161 /// If the OBS layer is present. OBS never increments the version of their layer,
162 /// so there's no reason to have the version.
163 has_obs_layer: bool,
164}
165
166pub struct InstanceShared {
167 raw: ash::Instance,
168 extensions: Vec<&'static CStr>,
169 flags: wgt::InstanceFlags,
170 memory_budget_thresholds: wgt::MemoryBudgetThresholds,
171 debug_utils: Option<DebugUtils>,
172 get_physical_device_properties: Option<khr::get_physical_device_properties2::Instance>,
173 entry: ash::Entry,
174 has_nv_optimus: bool,
175 android_sdk_version: u32,
176 /// The instance API version.
177 ///
178 /// Which is the version of Vulkan supported for instance-level functionality.
179 ///
180 /// It is associated with a `VkInstance` and its children,
181 /// except for a `VkPhysicalDevice` and its children.
182 instance_api_version: u32,
183
184 // The `drop_guard` field must be the last field of this struct so it is dropped last.
185 // Do not add new fields after it.
186 drop_guard: Option<crate::DropGuard>,
187}
188
189pub struct Instance {
190 shared: Arc<InstanceShared>,
191}
192
193pub struct Surface {
194 inner: ManuallyDrop<Box<dyn swapchain::Surface>>,
195 swapchain: RwLock<Option<Box<dyn swapchain::Swapchain>>>,
196}
197
198impl Surface {
199 /// Returns the raw Vulkan surface handle.
200 ///
201 /// Returns `None` if the surface is a DXGI surface.
202 pub unsafe fn raw_native_handle(&self) -> Option<vk::SurfaceKHR> {
203 Some(
204 self.inner
205 .as_any()
206 .downcast_ref::<swapchain::NativeSurface>()?
207 .as_raw(),
208 )
209 }
210
211 /// Get the raw Vulkan swapchain associated with this surface.
212 ///
213 /// Returns [`None`] if the surface is not configured or if the swapchain
214 /// is a DXGI swapchain.
215 pub fn raw_native_swapchain(&self) -> Option<vk::SwapchainKHR> {
216 let read = self.swapchain.read();
217 Some(
218 read.as_ref()?
219 .as_any()
220 .downcast_ref::<swapchain::NativeSwapchain>()?
221 .as_raw(),
222 )
223 }
224
225 /// Set the present timing information which will be used for the next [presentation](crate::Queue::present()) of this surface,
226 /// using [VK_GOOGLE_display_timing].
227 ///
228 /// This can be used to give an id to presentations, for future use of [`vk::PastPresentationTimingGOOGLE`].
229 /// Note that `wgpu-hal` does *not* provide a way to use that API - you should manually access this through [`ash`].
230 ///
231 /// This can also be used to add a "not before" timestamp to the presentation.
232 ///
233 /// The exact semantics of the fields are also documented in the [specification](https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPresentTimeGOOGLE.html) for the extension.
234 ///
235 /// # Panics
236 ///
237 /// - If the surface hasn't been configured.
238 /// - If the surface has been configured for a DXGI swapchain.
239 /// - If the device doesn't [support present timing](wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING).
240 ///
241 /// [VK_GOOGLE_display_timing]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_GOOGLE_display_timing.html
242 #[track_caller]
243 pub fn set_next_present_time(&self, present_timing: vk::PresentTimeGOOGLE) {
244 let mut swapchain = self.swapchain.write();
245 swapchain
246 .as_mut()
247 .expect("Surface should have been configured")
248 .as_any_mut()
249 .downcast_mut::<swapchain::NativeSwapchain>()
250 .expect("Surface should have a native Vulkan swapchain")
251 .set_next_present_time(present_timing);
252 }
253}
254
255#[derive(Debug)]
256pub struct SurfaceTexture {
257 index: u32,
258 texture: Texture,
259 metadata: Box<dyn swapchain::SurfaceTextureMetadata>,
260}
261
262impl crate::DynSurfaceTexture for SurfaceTexture {}
263
264impl Borrow<Texture> for SurfaceTexture {
265 fn borrow(&self) -> &Texture {
266 &self.texture
267 }
268}
269
270impl Borrow<dyn crate::DynTexture> for SurfaceTexture {
271 fn borrow(&self) -> &dyn crate::DynTexture {
272 &self.texture
273 }
274}
275
276pub struct Adapter {
277 raw: vk::PhysicalDevice,
278 instance: Arc<InstanceShared>,
279 //queue_families: Vec<vk::QueueFamilyProperties>,
280 known_memory_flags: vk::MemoryPropertyFlags,
281 phd_capabilities: adapter::PhysicalDeviceProperties,
282 phd_features: PhysicalDeviceFeatures,
283 downlevel_flags: wgt::DownlevelFlags,
284 private_caps: PrivateCapabilities,
285 workarounds: Workarounds,
286}
287
288// TODO there's no reason why this can't be unified--the function pointers should all be the same--it's not clear how to do this with `ash`.
289enum ExtensionFn<T> {
290 /// The loaded function pointer struct for an extension.
291 Extension(T),
292 /// The extension was promoted to a core version of Vulkan and the functions on `ash`'s `DeviceV1_x` traits should be used.
293 Promoted,
294}
295
296struct DeviceExtensionFunctions {
297 debug_utils: Option<ext::debug_utils::Device>,
298 draw_indirect_count: Option<khr::draw_indirect_count::Device>,
299 timeline_semaphore: Option<ExtensionFn<khr::timeline_semaphore::Device>>,
300 ray_tracing: Option<RayTracingDeviceExtensionFunctions>,
301 mesh_shading: Option<ext::mesh_shader::Device>,
302}
303
304struct RayTracingDeviceExtensionFunctions {
305 acceleration_structure: khr::acceleration_structure::Device,
306 buffer_device_address: khr::buffer_device_address::Device,
307}
308
309/// Set of internal capabilities, which don't show up in the exposed
310/// device geometry, but affect the code paths taken internally.
311#[derive(Clone, Debug)]
312struct PrivateCapabilities {
313 image_view_usage: bool,
314 timeline_semaphores: bool,
315 texture_d24: bool,
316 texture_d24_s8: bool,
317 texture_s8: bool,
318 /// Ability to present contents to any screen. Only needed to work around broken platform configurations.
319 can_present: bool,
320 non_coherent_map_mask: wgt::BufferAddress,
321 multi_draw_indirect: bool,
322 max_draw_indirect_count: u32,
323
324 /// True if this adapter advertises the [`robustBufferAccess`][vrba] feature.
325 ///
326 /// Note that Vulkan's `robustBufferAccess` is not sufficient to implement
327 /// `wgpu_hal`'s guarantee that shaders will not access buffer contents via
328 /// a given bindgroup binding outside that binding's [accessible
329 /// region][ar]. Enabling `robustBufferAccess` does ensure that
330 /// out-of-bounds reads and writes are not undefined behavior (that's good),
331 /// but still permits out-of-bounds reads to return data from anywhere
332 /// within the buffer, not just the accessible region.
333 ///
334 /// [ar]: ../struct.BufferBinding.html#accessible-region
335 /// [vrba]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#features-robustBufferAccess
336 robust_buffer_access: bool,
337
338 robust_image_access: bool,
339
340 /// True if this adapter supports the [`VK_EXT_robustness2`] extension's
341 /// [`robustBufferAccess2`] feature.
342 ///
343 /// This is sufficient to implement `wgpu_hal`'s [required bounds-checking][ar] of
344 /// shader accesses to buffer contents. If this feature is not available,
345 /// this backend must have Naga inject bounds checks in the generated
346 /// SPIR-V.
347 ///
348 /// [`VK_EXT_robustness2`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_EXT_robustness2.html
349 /// [`robustBufferAccess2`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPhysicalDeviceRobustness2FeaturesEXT.html#features-robustBufferAccess2
350 /// [ar]: ../struct.BufferBinding.html#accessible-region
351 robust_buffer_access2: bool,
352
353 robust_image_access2: bool,
354 zero_initialize_workgroup_memory: bool,
355 image_format_list: bool,
356 maximum_samplers: u32,
357
358 /// True if this adapter supports the [`VK_KHR_shader_integer_dot_product`] extension
359 /// (promoted to Vulkan 1.3).
360 ///
361 /// This is used to generate optimized code for WGSL's `dot4{I, U}8Packed`.
362 ///
363 /// [`VK_KHR_shader_integer_dot_product`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_KHR_shader_integer_dot_product.html
364 shader_integer_dot_product: bool,
365
366 /// True if this adapter supports 8-bit integers provided by the
367 /// [`VK_KHR_shader_float16_int8`] extension (promoted to Vulkan 1.2).
368 ///
369 /// Allows shaders to declare the "Int8" capability. Note, however, that this
370 /// feature alone allows the use of 8-bit integers "only in the `Private`,
371 /// `Workgroup` (for non-Block variables), and `Function` storage classes"
372 /// ([see spec]). To use 8-bit integers in the interface storage classes (e.g.,
373 /// `StorageBuffer`), you also need to enable the corresponding feature in
374 /// `VkPhysicalDevice8BitStorageFeatures` and declare the corresponding SPIR-V
375 /// capability (e.g., `StorageBuffer8BitAccess`).
376 ///
377 /// [`VK_KHR_shader_float16_int8`]: https://registry.khronos.org/vulkan/specs/latest/man/html/VK_KHR_shader_float16_int8.html
378 /// [see spec]: https://registry.khronos.org/vulkan/specs/latest/man/html/VkPhysicalDeviceShaderFloat16Int8Features.html#extension-features-shaderInt8
379 shader_int8: bool,
380
381 /// This is done to panic before undefined behavior, and is imperfect.
382 /// Basically, to allow implementations to emulate mv using instancing, if you
383 /// want to draw `n` instances to VR, you must draw `2n` instances, but you
384 /// can never draw more than `u32::MAX` instances. Therefore, when drawing
385 /// multiview on some vulkan implementations, it might restrict the instance
386 /// count, which isn't usually a thing in webgpu. We don't expose this limit
387 /// because its strange, i.e. only occurs on certain vulkan implementations
388 /// if you are drawing more than 128 million instances. We still want to avoid
389 /// undefined behavior in this situation, so we panic if the limit is violated.
390 multiview_instance_index_limit: u32,
391
392 /// BufferUsages::ACCELERATION_STRUCTURE_SCRATCH allows usage as a scratch buffer.
393 /// Vulkan has no way to specify this as a usage, and it maps to other usages, but
394 /// these usages do not have as high of an alignment requirement using the buffer as
395 /// a scratch buffer when building acceleration structures.
396 scratch_buffer_alignment: u32,
397}
398
399bitflags::bitflags!(
400 /// Workaround flags.
401 #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
402 pub struct Workarounds: u32 {
403 /// Only generate SPIR-V for one entry point at a time.
404 const SEPARATE_ENTRY_POINTS = 0x1;
405 /// Qualcomm OOMs when there are zero color attachments but a non-null pointer
406 /// to a subpass resolve attachment array. This nulls out that pointer in that case.
407 const EMPTY_RESOLVE_ATTACHMENT_LISTS = 0x2;
408 /// If the following code returns false, then nvidia will end up filling the wrong range.
409 ///
410 /// ```skip
411 /// fn nvidia_succeeds() -> bool {
412 /// # let (copy_length, start_offset) = (0, 0);
413 /// if copy_length >= 4096 {
414 /// if start_offset % 16 != 0 {
415 /// if copy_length == 4096 {
416 /// return true;
417 /// }
418 /// if copy_length % 16 == 0 {
419 /// return false;
420 /// }
421 /// }
422 /// }
423 /// true
424 /// }
425 /// ```
426 ///
427 /// As such, we need to make sure all calls to vkCmdFillBuffer are aligned to 16 bytes
428 /// if they cover a range of 4096 bytes or more.
429 const FORCE_FILL_BUFFER_WITH_SIZE_GREATER_4096_ALIGNED_OFFSET_16 = 0x4;
430 }
431);
432
433#[derive(Clone, Debug, Eq, Hash, PartialEq)]
434struct AttachmentKey {
435 format: vk::Format,
436 layout: vk::ImageLayout,
437 ops: crate::AttachmentOps,
438}
439
440impl AttachmentKey {
441 /// Returns an attachment key for a compatible attachment.
442 fn compatible(format: vk::Format, layout: vk::ImageLayout) -> Self {
443 Self {
444 format,
445 layout,
446 ops: crate::AttachmentOps::all(),
447 }
448 }
449}
450
451#[derive(Clone, Eq, Hash, PartialEq)]
452struct ColorAttachmentKey {
453 base: AttachmentKey,
454 resolve: Option<AttachmentKey>,
455}
456
457#[derive(Clone, Eq, Hash, PartialEq)]
458struct DepthStencilAttachmentKey {
459 base: AttachmentKey,
460 stencil_ops: crate::AttachmentOps,
461}
462
463#[derive(Clone, Eq, Default, Hash, PartialEq)]
464struct RenderPassKey {
465 colors: ArrayVec<Option<ColorAttachmentKey>, { crate::MAX_COLOR_ATTACHMENTS }>,
466 depth_stencil: Option<DepthStencilAttachmentKey>,
467 sample_count: u32,
468 multiview_mask: Option<NonZeroU32>,
469}
470
471struct DeviceShared {
472 raw: ash::Device,
473 family_index: u32,
474 queue_index: u32,
475 raw_queue: vk::Queue,
476 instance: Arc<InstanceShared>,
477 physical_device: vk::PhysicalDevice,
478 enabled_extensions: Vec<&'static CStr>,
479 extension_fns: DeviceExtensionFunctions,
480 vendor_id: u32,
481 pipeline_cache_validation_key: [u8; 16],
482 timestamp_period: f32,
483 private_caps: PrivateCapabilities,
484 workarounds: Workarounds,
485 features: wgt::Features,
486 render_passes: Mutex<FastHashMap<RenderPassKey, vk::RenderPass>>,
487 sampler_cache: Mutex<sampler::SamplerCache>,
488 memory_allocations_counter: InternalCounter,
489
490 /// Because we have cached framebuffers which are not deleted from until
491 /// the device is destroyed, if the implementation of vulkan re-uses handles
492 /// we need some way to differentiate between the old handle and the new handle.
493 /// This factory allows us to have a dedicated identity value for each texture.
494 texture_identity_factory: ResourceIdentityFactory<vk::Image>,
495 /// As above, for texture views.
496 texture_view_identity_factory: ResourceIdentityFactory<vk::ImageView>,
497
498 // The `drop_guard` field must be the last field of this struct so it is dropped last.
499 // Do not add new fields after it.
500 drop_guard: Option<crate::DropGuard>,
501}
502
503impl Drop for DeviceShared {
504 fn drop(&mut self) {
505 for &raw in self.render_passes.lock().values() {
506 unsafe { self.raw.destroy_render_pass(raw, None) };
507 }
508 if self.drop_guard.is_none() {
509 unsafe { self.raw.destroy_device(None) };
510 }
511 }
512}
513
514pub struct Device {
515 mem_allocator: Mutex<gpu_allocator::vulkan::Allocator>,
516 desc_allocator:
517 Mutex<gpu_descriptor::DescriptorAllocator<vk::DescriptorPool, vk::DescriptorSet>>,
518 valid_ash_memory_types: u32,
519 naga_options: naga::back::spv::Options<'static>,
520 #[cfg(feature = "renderdoc")]
521 render_doc: crate::auxil::renderdoc::RenderDoc,
522 counters: Arc<wgt::HalCounters>,
523 // Struct members are dropped from first to last, put the Device last to ensure that
524 // all resources that depends on it are destroyed before it like the mem_allocator
525 shared: Arc<DeviceShared>,
526}
527
528impl Drop for Device {
529 fn drop(&mut self) {
530 unsafe { self.desc_allocator.lock().cleanup(&*self.shared) };
531 }
532}
533
534/// Semaphores for forcing queue submissions to run in order.
535///
536/// The [`wgpu_hal::Queue`] trait promises that if two calls to [`submit`] are
537/// ordered, then the first submission will finish on the GPU before the second
538/// submission begins. To get this behavior on Vulkan we need to pass semaphores
539/// to [`vkQueueSubmit`] for the commands to wait on before beginning execution,
540/// and to signal when their execution is done.
541///
542/// Normally this can be done with a single semaphore, waited on and then
543/// signalled for each submission. At any given time there's exactly one
544/// submission that would signal the semaphore, and exactly one waiting on it,
545/// as Vulkan requires.
546///
547/// However, as of Oct 2021, bug [#5508] in the Mesa ANV drivers caused them to
548/// hang if we use a single semaphore. The workaround is to alternate between
549/// two semaphores. The bug has been fixed in Mesa, but we should probably keep
550/// the workaround until, say, Oct 2026.
551///
552/// [`wgpu_hal::Queue`]: crate::Queue
553/// [`submit`]: crate::Queue::submit
554/// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
555/// [#5508]: https://gitlab.freedesktop.org/mesa/mesa/-/issues/5508
556#[derive(Clone)]
557struct RelaySemaphores {
558 /// The semaphore the next submission should wait on before beginning
559 /// execution on the GPU. This is `None` for the first submission, which
560 /// should not wait on anything at all.
561 wait: Option<vk::Semaphore>,
562
563 /// The semaphore the next submission should signal when it has finished
564 /// execution on the GPU.
565 signal: vk::Semaphore,
566}
567
568impl RelaySemaphores {
569 fn new(device: &DeviceShared) -> Result<Self, crate::DeviceError> {
570 Ok(Self {
571 wait: None,
572 signal: device.new_binary_semaphore("RelaySemaphores: 1")?,
573 })
574 }
575
576 /// Advances the semaphores, returning the semaphores that should be used for a submission.
577 fn advance(&mut self, device: &DeviceShared) -> Result<Self, crate::DeviceError> {
578 let old = self.clone();
579
580 // Build the state for the next submission.
581 match self.wait {
582 None => {
583 // The `old` values describe the first submission to this queue.
584 // The second submission should wait on `old.signal`, and then
585 // signal a new semaphore which we'll create now.
586 self.wait = Some(old.signal);
587 self.signal = device.new_binary_semaphore("RelaySemaphores: 2")?;
588 }
589 Some(ref mut wait) => {
590 // What this submission signals, the next should wait.
591 mem::swap(wait, &mut self.signal);
592 }
593 };
594
595 Ok(old)
596 }
597
598 /// Destroys the semaphores.
599 unsafe fn destroy(&self, device: &ash::Device) {
600 unsafe {
601 if let Some(wait) = self.wait {
602 device.destroy_semaphore(wait, None);
603 }
604 device.destroy_semaphore(self.signal, None);
605 }
606 }
607}
608
609pub struct Queue {
610 raw: vk::Queue,
611 device: Arc<DeviceShared>,
612 family_index: u32,
613 relay_semaphores: Mutex<RelaySemaphores>,
614 signal_semaphores: Mutex<SemaphoreList>,
615}
616
617impl Queue {
618 pub fn as_raw(&self) -> vk::Queue {
619 self.raw
620 }
621}
622
623impl Drop for Queue {
624 fn drop(&mut self) {
625 unsafe { self.relay_semaphores.lock().destroy(&self.device.raw) };
626 }
627}
628#[derive(Debug)]
629enum BufferMemoryBacking {
630 Managed(gpu_allocator::vulkan::Allocation),
631 VulkanMemory {
632 memory: vk::DeviceMemory,
633 offset: u64,
634 size: u64,
635 },
636}
637impl BufferMemoryBacking {
638 fn memory(&self) -> vk::DeviceMemory {
639 match self {
640 Self::Managed(m) => unsafe { m.memory() },
641 Self::VulkanMemory { memory, .. } => *memory,
642 }
643 }
644 fn offset(&self) -> u64 {
645 match self {
646 Self::Managed(m) => m.offset(),
647 Self::VulkanMemory { offset, .. } => *offset,
648 }
649 }
650 fn size(&self) -> u64 {
651 match self {
652 Self::Managed(m) => m.size(),
653 Self::VulkanMemory { size, .. } => *size,
654 }
655 }
656}
657#[derive(Debug)]
658pub struct Buffer {
659 raw: vk::Buffer,
660 allocation: Option<Mutex<BufferMemoryBacking>>,
661}
662impl Buffer {
663 /// # Safety
664 ///
665 /// - `vk_buffer`'s memory must be managed by the caller
666 /// - Externally imported buffers can't be mapped by `wgpu`
667 pub unsafe fn from_raw(vk_buffer: vk::Buffer) -> Self {
668 Self {
669 raw: vk_buffer,
670 allocation: None,
671 }
672 }
673 /// # Safety
674 /// - We will use this buffer and the buffer's backing memory range as if we have exclusive ownership over it, until the wgpu resource is dropped and the wgpu-hal object is cleaned up
675 /// - Externally imported buffers can't be mapped by `wgpu`
676 /// - `offset` and `size` must be valid with the allocation of `memory`
677 pub unsafe fn from_raw_managed(
678 vk_buffer: vk::Buffer,
679 memory: vk::DeviceMemory,
680 offset: u64,
681 size: u64,
682 ) -> Self {
683 Self {
684 raw: vk_buffer,
685 allocation: Some(Mutex::new(BufferMemoryBacking::VulkanMemory {
686 memory,
687 offset,
688 size,
689 })),
690 }
691 }
692}
693
694impl crate::DynBuffer for Buffer {}
695
696#[derive(Debug)]
697pub struct AccelerationStructure {
698 raw: vk::AccelerationStructureKHR,
699 buffer: vk::Buffer,
700 allocation: gpu_allocator::vulkan::Allocation,
701 compacted_size_query: Option<vk::QueryPool>,
702}
703
704impl crate::DynAccelerationStructure for AccelerationStructure {}
705
706#[derive(Debug)]
707pub enum TextureMemory {
708 // shared memory in GPU allocator (owned by wgpu-hal)
709 Allocation(gpu_allocator::vulkan::Allocation),
710
711 // dedicated memory (owned by wgpu-hal)
712 Dedicated(vk::DeviceMemory),
713
714 // memory not owned by wgpu
715 External,
716}
717
718#[derive(Debug)]
719pub struct Texture {
720 raw: vk::Image,
721 memory: TextureMemory,
722 format: wgt::TextureFormat,
723 copy_size: crate::CopyExtent,
724 identity: ResourceIdentity<vk::Image>,
725
726 // The `drop_guard` field must be the last field of this struct so it is dropped last.
727 // Do not add new fields after it.
728 drop_guard: Option<crate::DropGuard>,
729}
730
731impl crate::DynTexture for Texture {}
732
733impl Texture {
734 /// # Safety
735 ///
736 /// - The image handle must not be manually destroyed
737 pub unsafe fn raw_handle(&self) -> vk::Image {
738 self.raw
739 }
740
741 /// # Safety
742 ///
743 /// - The caller must not free the `vk::DeviceMemory` or
744 /// `gpu_alloc::MemoryBlock` in the returned `TextureMemory`.
745 pub unsafe fn memory(&self) -> &TextureMemory {
746 &self.memory
747 }
748}
749
750#[derive(Debug)]
751pub struct TextureView {
752 raw_texture: vk::Image,
753 raw: vk::ImageView,
754 _layers: NonZeroU32,
755 format: wgt::TextureFormat,
756 raw_format: vk::Format,
757 base_mip_level: u32,
758 dimension: wgt::TextureViewDimension,
759 texture_identity: ResourceIdentity<vk::Image>,
760 view_identity: ResourceIdentity<vk::ImageView>,
761}
762
763impl crate::DynTextureView for TextureView {}
764
765impl TextureView {
766 /// # Safety
767 ///
768 /// - The image view handle must not be manually destroyed
769 pub unsafe fn raw_handle(&self) -> vk::ImageView {
770 self.raw
771 }
772
773 /// Returns the raw texture view, along with its identity.
774 fn identified_raw_view(&self) -> IdentifiedTextureView {
775 IdentifiedTextureView {
776 raw: self.raw,
777 identity: self.view_identity,
778 }
779 }
780}
781
782#[derive(Debug)]
783pub struct Sampler {
784 raw: vk::Sampler,
785 create_info: vk::SamplerCreateInfo<'static>,
786}
787
788impl crate::DynSampler for Sampler {}
789
790/// Information about a binding within a specific BindGroupLayout / BindGroup.
791/// This will be used to construct a [`naga::back::spv::BindingInfo`], where
792/// the descriptor set value will be taken from the index of the group.
793#[derive(Copy, Clone, Debug)]
794struct BindingInfo {
795 binding: u32,
796 binding_array_size: Option<NonZeroU32>,
797}
798
799#[derive(Debug)]
800pub struct BindGroupLayout {
801 raw: vk::DescriptorSetLayout,
802 desc_count: gpu_descriptor::DescriptorTotalCount,
803 /// Sorted list of entries.
804 entries: Box<[wgt::BindGroupLayoutEntry]>,
805 /// Map of original binding index to remapped binding index and optional
806 /// array size.
807 binding_map: Vec<(u32, BindingInfo)>,
808 contains_binding_arrays: bool,
809}
810
811impl crate::DynBindGroupLayout for BindGroupLayout {}
812
813#[derive(Debug)]
814pub struct PipelineLayout {
815 raw: vk::PipelineLayout,
816 binding_map: naga::back::spv::BindingMap,
817}
818
819impl crate::DynPipelineLayout for PipelineLayout {}
820
821#[derive(Debug)]
822pub struct BindGroup {
823 set: gpu_descriptor::DescriptorSet<vk::DescriptorSet>,
824}
825
826impl crate::DynBindGroup for BindGroup {}
827
828/// Miscellaneous allocation recycling pool for `CommandAllocator`.
829#[derive(Default)]
830struct Temp {
831 marker: Vec<u8>,
832 buffer_barriers: Vec<vk::BufferMemoryBarrier<'static>>,
833 image_barriers: Vec<vk::ImageMemoryBarrier<'static>>,
834}
835
836impl Temp {
837 fn clear(&mut self) {
838 self.marker.clear();
839 self.buffer_barriers.clear();
840 self.image_barriers.clear();
841 }
842
843 fn make_c_str(&mut self, name: &str) -> &CStr {
844 self.marker.clear();
845 self.marker.extend_from_slice(name.as_bytes());
846 self.marker.push(0);
847 unsafe { CStr::from_bytes_with_nul_unchecked(&self.marker) }
848 }
849}
850
851/// Generates unique IDs for each resource of type `T`.
852///
853/// Because vk handles are not permanently unique, this
854/// provides a way to generate unique IDs for each resource.
855struct ResourceIdentityFactory<T> {
856 #[cfg(not(target_has_atomic = "64"))]
857 next_id: Mutex<u64>,
858 #[cfg(target_has_atomic = "64")]
859 next_id: core::sync::atomic::AtomicU64,
860 _phantom: PhantomData<T>,
861}
862
863impl<T> ResourceIdentityFactory<T> {
864 fn new() -> Self {
865 Self {
866 #[cfg(not(target_has_atomic = "64"))]
867 next_id: Mutex::new(0),
868 #[cfg(target_has_atomic = "64")]
869 next_id: core::sync::atomic::AtomicU64::new(0),
870 _phantom: PhantomData,
871 }
872 }
873
874 /// Returns a new unique ID for a resource of type `T`.
875 fn next(&self) -> ResourceIdentity<T> {
876 #[cfg(not(target_has_atomic = "64"))]
877 {
878 let mut next_id = self.next_id.lock();
879 let id = *next_id;
880 *next_id += 1;
881 ResourceIdentity {
882 id,
883 _phantom: PhantomData,
884 }
885 }
886
887 #[cfg(target_has_atomic = "64")]
888 ResourceIdentity {
889 id: self
890 .next_id
891 .fetch_add(1, core::sync::atomic::Ordering::Relaxed),
892 _phantom: PhantomData,
893 }
894 }
895}
896
897/// A unique identifier for a resource of type `T`.
898///
899/// This is used as a hashable key for resources, which
900/// is permanently unique through the lifetime of the program.
901#[derive(Debug, Copy, Clone, Eq, Hash, PartialEq)]
902struct ResourceIdentity<T> {
903 id: u64,
904 _phantom: PhantomData<T>,
905}
906
907#[derive(Clone, Eq, Hash, PartialEq)]
908struct FramebufferKey {
909 raw_pass: vk::RenderPass,
910 /// Because this is used as a key in a hash map, we need to include the identity
911 /// so that this hashes differently, even if the ImageView handles are the same
912 /// between different views.
913 attachment_identities: ArrayVec<ResourceIdentity<vk::ImageView>, { MAX_TOTAL_ATTACHMENTS }>,
914 /// While this is redundant for calculating the hash, we need access to an array
915 /// of all the raw ImageViews when we are creating the actual framebuffer,
916 /// so we store this here.
917 attachment_views: ArrayVec<vk::ImageView, { MAX_TOTAL_ATTACHMENTS }>,
918 extent: wgt::Extent3d,
919}
920
921impl FramebufferKey {
922 fn push_view(&mut self, view: IdentifiedTextureView) {
923 self.attachment_identities.push(view.identity);
924 self.attachment_views.push(view.raw);
925 }
926}
927
928/// A texture view paired with its identity.
929#[derive(Copy, Clone)]
930struct IdentifiedTextureView {
931 raw: vk::ImageView,
932 identity: ResourceIdentity<vk::ImageView>,
933}
934
935#[derive(Clone, Eq, Hash, PartialEq)]
936struct TempTextureViewKey {
937 texture: vk::Image,
938 /// As this is used in a hashmap, we need to
939 /// include the identity so that this hashes differently,
940 /// even if the Image handles are the same between different images.
941 texture_identity: ResourceIdentity<vk::Image>,
942 format: vk::Format,
943 mip_level: u32,
944 depth_slice: u32,
945}
946
947pub struct CommandEncoder {
948 raw: vk::CommandPool,
949 device: Arc<DeviceShared>,
950
951 /// The current command buffer, if `self` is in the ["recording"]
952 /// state.
953 ///
954 /// ["recording"]: crate::CommandEncoder
955 ///
956 /// If non-`null`, the buffer is in the Vulkan "recording" state.
957 active: vk::CommandBuffer,
958
959 /// What kind of pass we are currently within: compute or render.
960 bind_point: vk::PipelineBindPoint,
961
962 /// Allocation recycling pool for this encoder.
963 temp: Temp,
964
965 /// A pool of available command buffers.
966 ///
967 /// These are all in the Vulkan "initial" state.
968 free: Vec<vk::CommandBuffer>,
969
970 /// A pool of discarded command buffers.
971 ///
972 /// These could be in any Vulkan state except "pending".
973 discarded: Vec<vk::CommandBuffer>,
974
975 /// If this is true, the active renderpass enabled a debug span,
976 /// and needs to be disabled on renderpass close.
977 rpass_debug_marker_active: bool,
978
979 /// If set, the end of the next render/compute pass will write a timestamp at
980 /// the given pool & location.
981 end_of_pass_timer_query: Option<(vk::QueryPool, u32)>,
982
983 framebuffers: FastHashMap<FramebufferKey, vk::Framebuffer>,
984 temp_texture_views: FastHashMap<TempTextureViewKey, IdentifiedTextureView>,
985
986 counters: Arc<wgt::HalCounters>,
987
988 current_pipeline_is_multiview: bool,
989}
990
991impl Drop for CommandEncoder {
992 fn drop(&mut self) {
993 // SAFETY:
994 //
995 // VUID-vkDestroyCommandPool-commandPool-00041: wgpu_hal requires that a
996 // `CommandBuffer` must live until its execution is complete, and that a
997 // `CommandBuffer` must not outlive the `CommandEncoder` that built it.
998 // Thus, we know that none of our `CommandBuffers` are in the "pending"
999 // state.
1000 //
1001 // The other VUIDs are pretty obvious.
1002 unsafe {
1003 // `vkDestroyCommandPool` also frees any command buffers allocated
1004 // from that pool, so there's no need to explicitly call
1005 // `vkFreeCommandBuffers` on `cmd_encoder`'s `free` and `discarded`
1006 // fields.
1007 self.device.raw.destroy_command_pool(self.raw, None);
1008 }
1009
1010 for (_, fb) in self.framebuffers.drain() {
1011 unsafe { self.device.raw.destroy_framebuffer(fb, None) };
1012 }
1013
1014 for (_, view) in self.temp_texture_views.drain() {
1015 unsafe { self.device.raw.destroy_image_view(view.raw, None) };
1016 }
1017
1018 self.counters.command_encoders.sub(1);
1019 }
1020}
1021
1022impl CommandEncoder {
1023 /// # Safety
1024 ///
1025 /// - The command buffer handle must not be manually destroyed
1026 pub unsafe fn raw_handle(&self) -> vk::CommandBuffer {
1027 self.active
1028 }
1029}
1030
1031impl fmt::Debug for CommandEncoder {
1032 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1033 f.debug_struct("CommandEncoder")
1034 .field("raw", &self.raw)
1035 .finish()
1036 }
1037}
1038
1039#[derive(Debug)]
1040pub struct CommandBuffer {
1041 raw: vk::CommandBuffer,
1042}
1043
1044impl crate::DynCommandBuffer for CommandBuffer {}
1045
1046#[derive(Debug)]
1047pub enum ShaderModule {
1048 Raw(vk::ShaderModule),
1049 Intermediate {
1050 naga_shader: crate::NagaShader,
1051 runtime_checks: wgt::ShaderRuntimeChecks,
1052 },
1053}
1054
1055impl crate::DynShaderModule for ShaderModule {}
1056
1057#[derive(Debug)]
1058pub struct RenderPipeline {
1059 raw: vk::Pipeline,
1060 is_multiview: bool,
1061}
1062
1063impl crate::DynRenderPipeline for RenderPipeline {}
1064
1065#[derive(Debug)]
1066pub struct ComputePipeline {
1067 raw: vk::Pipeline,
1068}
1069
1070impl crate::DynComputePipeline for ComputePipeline {}
1071
1072#[derive(Debug)]
1073pub struct PipelineCache {
1074 raw: vk::PipelineCache,
1075}
1076
1077impl crate::DynPipelineCache for PipelineCache {}
1078
1079#[derive(Debug)]
1080pub struct QuerySet {
1081 raw: vk::QueryPool,
1082}
1083
1084impl crate::DynQuerySet for QuerySet {}
1085
1086/// The [`Api::Fence`] type for [`vulkan::Api`].
1087///
1088/// This is an `enum` because there are two possible implementations of
1089/// `wgpu-hal` fences on Vulkan: Vulkan fences, which work on any version of
1090/// Vulkan, and Vulkan timeline semaphores, which are easier and cheaper but
1091/// require non-1.0 features.
1092///
1093/// [`Device::create_fence`] returns a [`TimelineSemaphore`] if
1094/// [`VK_KHR_timeline_semaphore`] is available and enabled, and a [`FencePool`]
1095/// otherwise.
1096///
1097/// [`Api::Fence`]: crate::Api::Fence
1098/// [`vulkan::Api`]: Api
1099/// [`Device::create_fence`]: crate::Device::create_fence
1100/// [`TimelineSemaphore`]: Fence::TimelineSemaphore
1101/// [`VK_KHR_timeline_semaphore`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VK_KHR_timeline_semaphore
1102/// [`FencePool`]: Fence::FencePool
1103#[derive(Debug)]
1104pub enum Fence {
1105 /// A Vulkan [timeline semaphore].
1106 ///
1107 /// These are simpler to use than Vulkan fences, since timeline semaphores
1108 /// work exactly the way [`wpgu_hal::Api::Fence`] is specified to work.
1109 ///
1110 /// [timeline semaphore]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-semaphores
1111 /// [`wpgu_hal::Api::Fence`]: crate::Api::Fence
1112 TimelineSemaphore(vk::Semaphore),
1113
1114 /// A collection of Vulkan [fence]s, each associated with a [`FenceValue`].
1115 ///
1116 /// The effective [`FenceValue`] of this variant is the greater of
1117 /// `last_completed` and the maximum value associated with a signalled fence
1118 /// in `active`.
1119 ///
1120 /// Fences are available in all versions of Vulkan, but since they only have
1121 /// two states, "signaled" and "unsignaled", we need to use a separate fence
1122 /// for each queue submission we might want to wait for, and remember which
1123 /// [`FenceValue`] each one represents.
1124 ///
1125 /// [fence]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-fences
1126 /// [`FenceValue`]: crate::FenceValue
1127 FencePool {
1128 last_completed: crate::FenceValue,
1129 /// The pending fence values have to be ascending.
1130 active: Vec<(crate::FenceValue, vk::Fence)>,
1131 free: Vec<vk::Fence>,
1132 },
1133}
1134
1135impl crate::DynFence for Fence {}
1136
1137impl Fence {
1138 /// Return the highest [`FenceValue`] among the signalled fences in `active`.
1139 ///
1140 /// As an optimization, assume that we already know that the fence has
1141 /// reached `last_completed`, and don't bother checking fences whose values
1142 /// are less than that: those fences remain in the `active` array only
1143 /// because we haven't called `maintain` yet to clean them up.
1144 ///
1145 /// [`FenceValue`]: crate::FenceValue
1146 fn check_active(
1147 device: &ash::Device,
1148 mut last_completed: crate::FenceValue,
1149 active: &[(crate::FenceValue, vk::Fence)],
1150 ) -> Result<crate::FenceValue, crate::DeviceError> {
1151 for &(value, raw) in active.iter() {
1152 unsafe {
1153 if value > last_completed
1154 && device
1155 .get_fence_status(raw)
1156 .map_err(map_host_device_oom_and_lost_err)?
1157 {
1158 last_completed = value;
1159 }
1160 }
1161 }
1162 Ok(last_completed)
1163 }
1164
1165 /// Return the highest signalled [`FenceValue`] for `self`.
1166 ///
1167 /// [`FenceValue`]: crate::FenceValue
1168 fn get_latest(
1169 &self,
1170 device: &ash::Device,
1171 extension: Option<&ExtensionFn<khr::timeline_semaphore::Device>>,
1172 ) -> Result<crate::FenceValue, crate::DeviceError> {
1173 match *self {
1174 Self::TimelineSemaphore(raw) => unsafe {
1175 Ok(match *extension.unwrap() {
1176 ExtensionFn::Extension(ref ext) => ext
1177 .get_semaphore_counter_value(raw)
1178 .map_err(map_host_device_oom_and_lost_err)?,
1179 ExtensionFn::Promoted => device
1180 .get_semaphore_counter_value(raw)
1181 .map_err(map_host_device_oom_and_lost_err)?,
1182 })
1183 },
1184 Self::FencePool {
1185 last_completed,
1186 ref active,
1187 free: _,
1188 } => Self::check_active(device, last_completed, active),
1189 }
1190 }
1191
1192 /// Trim the internal state of this [`Fence`].
1193 ///
1194 /// This function has no externally visible effect, but you should call it
1195 /// periodically to keep this fence's resource consumption under control.
1196 ///
1197 /// For fences using the [`FencePool`] implementation, this function
1198 /// recycles fences that have been signaled. If you don't call this,
1199 /// [`Queue::submit`] will just keep allocating a new Vulkan fence every
1200 /// time it's called.
1201 ///
1202 /// [`FencePool`]: Fence::FencePool
1203 /// [`Queue::submit`]: crate::Queue::submit
1204 fn maintain(&mut self, device: &ash::Device) -> Result<(), crate::DeviceError> {
1205 match *self {
1206 Self::TimelineSemaphore(_) => {}
1207 Self::FencePool {
1208 ref mut last_completed,
1209 ref mut active,
1210 ref mut free,
1211 } => {
1212 let latest = Self::check_active(device, *last_completed, active)?;
1213 let base_free = free.len();
1214 for &(value, raw) in active.iter() {
1215 if value <= latest {
1216 free.push(raw);
1217 }
1218 }
1219 if free.len() != base_free {
1220 active.retain(|&(value, _)| value > latest);
1221 unsafe { device.reset_fences(&free[base_free..]) }
1222 .map_err(map_device_oom_err)?
1223 }
1224 *last_completed = latest;
1225 }
1226 }
1227 Ok(())
1228 }
1229}
1230
1231impl crate::Queue for Queue {
1232 type A = Api;
1233
1234 unsafe fn submit(
1235 &self,
1236 command_buffers: &[&CommandBuffer],
1237 surface_textures: &[&SurfaceTexture],
1238 (signal_fence, signal_value): (&mut Fence, crate::FenceValue),
1239 ) -> Result<(), crate::DeviceError> {
1240 let mut fence_raw = vk::Fence::null();
1241
1242 let mut wait_semaphores = SemaphoreList::new(SemaphoreListMode::Wait);
1243 let mut signal_semaphores = SemaphoreList::new(SemaphoreListMode::Signal);
1244
1245 // Double check that the same swapchain image isn't being given to us multiple times,
1246 // as that will deadlock when we try to lock them all.
1247 debug_assert!(
1248 {
1249 let mut check = HashSet::with_capacity(surface_textures.len());
1250 // We compare the Box by pointer, as Eq isn't well defined for SurfaceSemaphores.
1251 for st in surface_textures {
1252 let ptr: *const () = <*const _>::cast(&*st.metadata);
1253 check.insert(ptr as usize);
1254 }
1255 check.len() == surface_textures.len()
1256 },
1257 "More than one surface texture is being used from the same swapchain. This will cause a deadlock in release."
1258 );
1259
1260 let locked_swapchain_semaphores = surface_textures
1261 .iter()
1262 .map(|st| st.metadata.get_semaphore_guard())
1263 .collect::<Vec<_>>();
1264
1265 for mut semaphores in locked_swapchain_semaphores {
1266 semaphores.set_used_fence_value(signal_value);
1267
1268 // If we're the first submission to operate on this image, wait on
1269 // its acquire semaphore, to make sure the presentation engine is
1270 // done with it.
1271 if let Some(sem) = semaphores.get_acquire_wait_semaphore() {
1272 wait_semaphores.push_wait(sem, vk::PipelineStageFlags::TOP_OF_PIPE);
1273 }
1274
1275 // Get a semaphore to signal when we're done writing to this surface
1276 // image. Presentation of this image will wait for this.
1277 let signal_semaphore = semaphores.get_submit_signal_semaphore(&self.device)?;
1278 signal_semaphores.push_signal(signal_semaphore);
1279 }
1280
1281 let mut guard = self.signal_semaphores.lock();
1282 if !guard.is_empty() {
1283 signal_semaphores.append(&mut guard);
1284 }
1285
1286 // In order for submissions to be strictly ordered, we encode a dependency between each submission
1287 // using a pair of semaphores. This adds a wait if it is needed, and signals the next semaphore.
1288 let semaphore_state = self.relay_semaphores.lock().advance(&self.device)?;
1289
1290 if let Some(sem) = semaphore_state.wait {
1291 wait_semaphores.push_wait(
1292 SemaphoreType::Binary(sem),
1293 vk::PipelineStageFlags::TOP_OF_PIPE,
1294 );
1295 }
1296
1297 signal_semaphores.push_signal(SemaphoreType::Binary(semaphore_state.signal));
1298
1299 // We need to signal our wgpu::Fence if we have one, this adds it to the signal list.
1300 signal_fence.maintain(&self.device.raw)?;
1301 match *signal_fence {
1302 Fence::TimelineSemaphore(raw) => {
1303 signal_semaphores.push_signal(SemaphoreType::Timeline(raw, signal_value));
1304 }
1305 Fence::FencePool {
1306 ref mut active,
1307 ref mut free,
1308 ..
1309 } => {
1310 fence_raw = match free.pop() {
1311 Some(raw) => raw,
1312 None => unsafe {
1313 self.device
1314 .raw
1315 .create_fence(&vk::FenceCreateInfo::default(), None)
1316 .map_err(map_host_device_oom_err)?
1317 },
1318 };
1319 active.push((signal_value, fence_raw));
1320 }
1321 }
1322
1323 let vk_cmd_buffers = command_buffers
1324 .iter()
1325 .map(|cmd| cmd.raw)
1326 .collect::<Vec<_>>();
1327
1328 let mut vk_info = vk::SubmitInfo::default().command_buffers(&vk_cmd_buffers);
1329 let mut vk_timeline_info = mem::MaybeUninit::uninit();
1330 vk_info = SemaphoreList::add_to_submit(
1331 &mut wait_semaphores,
1332 &mut signal_semaphores,
1333 vk_info,
1334 &mut vk_timeline_info,
1335 );
1336
1337 profiling::scope!("vkQueueSubmit");
1338 unsafe {
1339 self.device
1340 .raw
1341 .queue_submit(self.raw, &[vk_info], fence_raw)
1342 .map_err(map_host_device_oom_and_lost_err)?
1343 };
1344 Ok(())
1345 }
1346
1347 unsafe fn present(
1348 &self,
1349 surface: &Surface,
1350 texture: SurfaceTexture,
1351 ) -> Result<(), crate::SurfaceError> {
1352 let mut swapchain = surface.swapchain.write();
1353
1354 unsafe { swapchain.as_mut().unwrap().present(self, texture) }
1355 }
1356
1357 unsafe fn get_timestamp_period(&self) -> f32 {
1358 self.device.timestamp_period
1359 }
1360}
1361
1362impl Queue {
1363 pub fn raw_device(&self) -> &ash::Device {
1364 &self.device.raw
1365 }
1366
1367 pub fn add_signal_semaphore(&self, semaphore: vk::Semaphore, semaphore_value: Option<u64>) {
1368 let mut guard = self.signal_semaphores.lock();
1369 if let Some(value) = semaphore_value {
1370 guard.push_signal(SemaphoreType::Timeline(semaphore, value));
1371 } else {
1372 guard.push_signal(SemaphoreType::Binary(semaphore));
1373 }
1374 }
1375}
1376
1377/// Maps
1378///
1379/// - VK_ERROR_OUT_OF_HOST_MEMORY
1380/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1381fn map_host_device_oom_err(err: vk::Result) -> crate::DeviceError {
1382 match err {
1383 vk::Result::ERROR_OUT_OF_HOST_MEMORY | vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => {
1384 get_oom_err(err)
1385 }
1386 e => get_unexpected_err(e),
1387 }
1388}
1389
1390/// Maps
1391///
1392/// - VK_ERROR_OUT_OF_HOST_MEMORY
1393/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1394/// - VK_ERROR_DEVICE_LOST
1395fn map_host_device_oom_and_lost_err(err: vk::Result) -> crate::DeviceError {
1396 match err {
1397 vk::Result::ERROR_DEVICE_LOST => get_lost_err(),
1398 other => map_host_device_oom_err(other),
1399 }
1400}
1401
1402/// Maps
1403///
1404/// - VK_ERROR_OUT_OF_HOST_MEMORY
1405/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1406/// - VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1407fn map_host_device_oom_and_ioca_err(err: vk::Result) -> crate::DeviceError {
1408 // We don't use VK_KHR_buffer_device_address
1409 // VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1410 map_host_device_oom_err(err)
1411}
1412
1413/// Maps
1414///
1415/// - VK_ERROR_OUT_OF_HOST_MEMORY
1416fn map_host_oom_err(err: vk::Result) -> crate::DeviceError {
1417 match err {
1418 vk::Result::ERROR_OUT_OF_HOST_MEMORY => get_oom_err(err),
1419 e => get_unexpected_err(e),
1420 }
1421}
1422
1423/// Maps
1424///
1425/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1426fn map_device_oom_err(err: vk::Result) -> crate::DeviceError {
1427 match err {
1428 vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => get_oom_err(err),
1429 e => get_unexpected_err(e),
1430 }
1431}
1432
1433/// Maps
1434///
1435/// - VK_ERROR_OUT_OF_HOST_MEMORY
1436/// - VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1437fn map_host_oom_and_ioca_err(err: vk::Result) -> crate::DeviceError {
1438 // We don't use VK_KHR_buffer_device_address
1439 // VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1440 map_host_oom_err(err)
1441}
1442
1443/// Maps
1444///
1445/// - VK_ERROR_OUT_OF_HOST_MEMORY
1446/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1447/// - VK_PIPELINE_COMPILE_REQUIRED_EXT
1448/// - VK_ERROR_INVALID_SHADER_NV
1449fn map_pipeline_err(err: vk::Result) -> crate::DeviceError {
1450 // We don't use VK_EXT_pipeline_creation_cache_control
1451 // VK_PIPELINE_COMPILE_REQUIRED_EXT
1452 // We don't use VK_NV_glsl_shader
1453 // VK_ERROR_INVALID_SHADER_NV
1454 map_host_device_oom_err(err)
1455}
1456
1457/// Returns [`crate::DeviceError::Unexpected`] or panics if the `internal_error_panic`
1458/// feature flag is enabled.
1459fn get_unexpected_err(_err: vk::Result) -> crate::DeviceError {
1460 #[cfg(feature = "internal_error_panic")]
1461 panic!("Unexpected Vulkan error: {_err:?}");
1462
1463 #[allow(unreachable_code)]
1464 crate::DeviceError::Unexpected
1465}
1466
1467/// Returns [`crate::DeviceError::OutOfMemory`].
1468fn get_oom_err(_err: vk::Result) -> crate::DeviceError {
1469 crate::DeviceError::OutOfMemory
1470}
1471
1472/// Returns [`crate::DeviceError::Lost`] or panics if the `device_lost_panic`
1473/// feature flag is enabled.
1474fn get_lost_err() -> crate::DeviceError {
1475 #[cfg(feature = "device_lost_panic")]
1476 panic!("Device lost");
1477
1478 #[allow(unreachable_code)]
1479 crate::DeviceError::Lost
1480}
1481
1482#[derive(Clone, Copy, Pod, Zeroable)]
1483#[repr(C)]
1484struct RawTlasInstance {
1485 transform: [f32; 12],
1486 custom_data_and_mask: u32,
1487 shader_binding_table_record_offset_and_flags: u32,
1488 acceleration_structure_reference: u64,
1489}
1490
1491/// Arguments to the [`CreateDeviceCallback`].
1492pub struct CreateDeviceCallbackArgs<'arg, 'pnext, 'this>
1493where
1494 'this: 'pnext,
1495{
1496 /// The extensions to enable for the device. You must not remove anything from this list,
1497 /// but you may add to it.
1498 pub extensions: &'arg mut Vec<&'static CStr>,
1499 /// The physical device features to enable. You may enable features, but must not disable any.
1500 pub device_features: &'arg mut PhysicalDeviceFeatures,
1501 /// The queue create infos for the device. You may add or modify queue create infos as needed.
1502 pub queue_create_infos: &'arg mut Vec<vk::DeviceQueueCreateInfo<'pnext>>,
1503 /// The create info for the device. You may add or modify things in the pnext chain, but
1504 /// do not turn features off. Additionally, do not add things to the list of extensions,
1505 /// or to the feature set, as all changes to that member will be overwritten.
1506 pub create_info: &'arg mut vk::DeviceCreateInfo<'pnext>,
1507 /// We need to have `'this` in the struct, so we can declare that all lifetimes coming from
1508 /// captures in the closure will live longer (and hence satisfy) `'pnext`. However, we
1509 /// don't actually directly use `'this`
1510 _phantom: PhantomData<&'this ()>,
1511}
1512
1513/// Callback to allow changing the vulkan device creation parameters.
1514///
1515/// # Safety:
1516/// - If you want to add extensions, add the to the `Vec<'static CStr>` not the create info,
1517/// as the create info value will be overwritten.
1518/// - Callback must not remove features.
1519/// - Callback must not change anything to what the instance does not support.
1520pub type CreateDeviceCallback<'this> =
1521 dyn for<'arg, 'pnext> FnOnce(CreateDeviceCallbackArgs<'arg, 'pnext, 'this>) + 'this;
1522
1523/// Arguments to the [`CreateInstanceCallback`].
1524pub struct CreateInstanceCallbackArgs<'arg, 'pnext, 'this>
1525where
1526 'this: 'pnext,
1527{
1528 /// The extensions to enable for the instance. You must not remove anything from this list,
1529 /// but you may add to it.
1530 pub extensions: &'arg mut Vec<&'static CStr>,
1531 /// The create info for the instance. You may add or modify things in the pnext chain, but
1532 /// do not turn features off. Additionally, do not add things to the list of extensions,
1533 /// all changes to that member will be overwritten.
1534 pub create_info: &'arg mut vk::InstanceCreateInfo<'pnext>,
1535 /// Vulkan entry point.
1536 pub entry: &'arg ash::Entry,
1537 /// We need to have `'this` in the struct, so we can declare that all lifetimes coming from
1538 /// captures in the closure will live longer (and hence satisfy) `'pnext`. However, we
1539 /// don't actually directly use `'this`
1540 _phantom: PhantomData<&'this ()>,
1541}
1542
1543/// Callback to allow changing the vulkan instance creation parameters.
1544///
1545/// # Safety:
1546/// - If you want to add extensions, add the to the `Vec<'static CStr>` not the create info,
1547/// as the create info value will be overwritten.
1548/// - Callback must not remove features.
1549/// - Callback must not change anything to what the instance does not support.
1550pub type CreateInstanceCallback<'this> =
1551 dyn for<'arg, 'pnext> FnOnce(CreateInstanceCallbackArgs<'arg, 'pnext, 'this>) + 'this;