wgpu_hal/vulkan/mod.rs
1/*!
2# Vulkan API internals.
3
4## Stack memory
5
6Ash expects slices, which we don't generally have available.
7We cope with this requirement by the combination of the following ways:
8 - temporarily allocating `Vec` on heap, where overhead is permitted
9 - growing temporary local storage
10
11## Framebuffers and Render passes
12
13Render passes are cached on the device and kept forever.
14
15Framebuffers are also cached on the device, but they are removed when
16any of the image views (they have) gets removed.
17If Vulkan supports image-less framebuffers,
18then the actual views are excluded from the framebuffer key.
19
20## Fences
21
22If timeline semaphores are available, they are used 1:1 with wgpu-hal fences.
23Otherwise, we manage a pool of `VkFence` objects behind each `hal::Fence`.
24
25!*/
26
27mod adapter;
28mod command;
29pub mod conv;
30mod device;
31mod drm;
32mod instance;
33mod sampler;
34mod semaphore_list;
35mod swapchain;
36
37pub use adapter::PhysicalDeviceFeatures;
38
39use alloc::{boxed::Box, ffi::CString, sync::Arc, vec::Vec};
40use core::{
41 borrow::Borrow,
42 ffi::CStr,
43 fmt,
44 marker::PhantomData,
45 mem::{self, ManuallyDrop},
46 num::NonZeroU32,
47};
48
49use arrayvec::ArrayVec;
50use ash::{ext, khr, vk};
51use bytemuck::{Pod, Zeroable};
52use hashbrown::HashSet;
53use parking_lot::{Mutex, RwLock};
54
55use naga::FastHashMap;
56use wgt::InternalCounter;
57
58use semaphore_list::SemaphoreList;
59
60use crate::vulkan::semaphore_list::{SemaphoreListMode, SemaphoreType};
61
62const MAX_TOTAL_ATTACHMENTS: usize = crate::MAX_COLOR_ATTACHMENTS * 2 + 1;
63
64#[derive(Clone, Debug)]
65pub struct Api;
66
67impl crate::Api for Api {
68 const VARIANT: wgt::Backend = wgt::Backend::Vulkan;
69
70 type Instance = Instance;
71 type Surface = Surface;
72 type Adapter = Adapter;
73 type Device = Device;
74
75 type Queue = Queue;
76 type CommandEncoder = CommandEncoder;
77 type CommandBuffer = CommandBuffer;
78
79 type Buffer = Buffer;
80 type Texture = Texture;
81 type SurfaceTexture = SurfaceTexture;
82 type TextureView = TextureView;
83 type Sampler = Sampler;
84 type QuerySet = QuerySet;
85 type Fence = Fence;
86 type AccelerationStructure = AccelerationStructure;
87 type PipelineCache = PipelineCache;
88
89 type BindGroupLayout = BindGroupLayout;
90 type BindGroup = BindGroup;
91 type PipelineLayout = PipelineLayout;
92 type ShaderModule = ShaderModule;
93 type RenderPipeline = RenderPipeline;
94 type ComputePipeline = ComputePipeline;
95}
96
97crate::impl_dyn_resource!(
98 Adapter,
99 AccelerationStructure,
100 BindGroup,
101 BindGroupLayout,
102 Buffer,
103 CommandBuffer,
104 CommandEncoder,
105 ComputePipeline,
106 Device,
107 Fence,
108 Instance,
109 PipelineCache,
110 PipelineLayout,
111 QuerySet,
112 Queue,
113 RenderPipeline,
114 Sampler,
115 ShaderModule,
116 Surface,
117 SurfaceTexture,
118 Texture,
119 TextureView
120);
121
122struct DebugUtils {
123 extension: ext::debug_utils::Instance,
124 messenger: vk::DebugUtilsMessengerEXT,
125
126 /// Owning pointer to the debug messenger callback user data.
127 ///
128 /// `InstanceShared::drop` destroys the debug messenger before
129 /// dropping this, so the callback should never receive a dangling
130 /// user data pointer.
131 #[allow(dead_code)]
132 callback_data: Box<DebugUtilsMessengerUserData>,
133}
134
135pub struct DebugUtilsCreateInfo {
136 severity: vk::DebugUtilsMessageSeverityFlagsEXT,
137 message_type: vk::DebugUtilsMessageTypeFlagsEXT,
138 callback_data: Box<DebugUtilsMessengerUserData>,
139}
140
141#[derive(Debug)]
142/// The properties related to the validation layer needed for the
143/// DebugUtilsMessenger for their workarounds
144struct ValidationLayerProperties {
145 /// Validation layer description, from `vk::LayerProperties`.
146 layer_description: CString,
147
148 /// Validation layer specification version, from `vk::LayerProperties`.
149 layer_spec_version: u32,
150}
151
152/// User data needed by `instance::debug_utils_messenger_callback`.
153///
154/// When we create the [`vk::DebugUtilsMessengerEXT`], the `pUserData`
155/// pointer refers to one of these values.
156#[derive(Debug)]
157pub struct DebugUtilsMessengerUserData {
158 /// The properties related to the validation layer, if present
159 validation_layer_properties: Option<ValidationLayerProperties>,
160
161 /// If the OBS layer is present. OBS never increments the version of their layer,
162 /// so there's no reason to have the version.
163 has_obs_layer: bool,
164}
165
166pub struct InstanceShared {
167 raw: ash::Instance,
168 extensions: Vec<&'static CStr>,
169 flags: wgt::InstanceFlags,
170 memory_budget_thresholds: wgt::MemoryBudgetThresholds,
171 debug_utils: Option<DebugUtils>,
172 get_physical_device_properties: Option<khr::get_physical_device_properties2::Instance>,
173 entry: ash::Entry,
174 has_nv_optimus: bool,
175 android_sdk_version: u32,
176 /// The instance API version.
177 ///
178 /// Which is the version of Vulkan supported for instance-level functionality.
179 ///
180 /// It is associated with a `VkInstance` and its children,
181 /// except for a `VkPhysicalDevice` and its children.
182 instance_api_version: u32,
183
184 // The `drop_guard` field must be the last field of this struct so it is dropped last.
185 // Do not add new fields after it.
186 drop_guard: Option<crate::DropGuard>,
187}
188
189pub struct Instance {
190 shared: Arc<InstanceShared>,
191}
192
193pub struct Surface {
194 inner: ManuallyDrop<Box<dyn swapchain::Surface>>,
195 swapchain: RwLock<Option<Box<dyn swapchain::Swapchain>>>,
196}
197
198impl Surface {
199 /// Returns the raw Vulkan surface handle.
200 ///
201 /// Returns `None` if the surface is a DXGI surface.
202 pub unsafe fn raw_native_handle(&self) -> Option<vk::SurfaceKHR> {
203 Some(
204 self.inner
205 .as_any()
206 .downcast_ref::<swapchain::NativeSurface>()?
207 .as_raw(),
208 )
209 }
210
211 /// Get the raw Vulkan swapchain associated with this surface.
212 ///
213 /// Returns [`None`] if the surface is not configured or if the swapchain
214 /// is a DXGI swapchain.
215 pub fn raw_native_swapchain(&self) -> Option<vk::SwapchainKHR> {
216 let read = self.swapchain.read();
217 Some(
218 read.as_ref()?
219 .as_any()
220 .downcast_ref::<swapchain::NativeSwapchain>()?
221 .as_raw(),
222 )
223 }
224
225 /// Set the present timing information which will be used for the next [presentation](crate::Queue::present()) of this surface,
226 /// using [VK_GOOGLE_display_timing].
227 ///
228 /// This can be used to give an id to presentations, for future use of [`vk::PastPresentationTimingGOOGLE`].
229 /// Note that `wgpu-hal` does *not* provide a way to use that API - you should manually access this through [`ash`].
230 ///
231 /// This can also be used to add a "not before" timestamp to the presentation.
232 ///
233 /// The exact semantics of the fields are also documented in the [specification](https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPresentTimeGOOGLE.html) for the extension.
234 ///
235 /// # Panics
236 ///
237 /// - If the surface hasn't been configured.
238 /// - If the surface has been configured for a DXGI swapchain.
239 /// - If the device doesn't [support present timing](wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING).
240 ///
241 /// [VK_GOOGLE_display_timing]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_GOOGLE_display_timing.html
242 #[track_caller]
243 pub fn set_next_present_time(&self, present_timing: vk::PresentTimeGOOGLE) {
244 let mut swapchain = self.swapchain.write();
245 swapchain
246 .as_mut()
247 .expect("Surface should have been configured")
248 .as_any_mut()
249 .downcast_mut::<swapchain::NativeSwapchain>()
250 .expect("Surface should have a native Vulkan swapchain")
251 .set_next_present_time(present_timing);
252 }
253}
254
255#[derive(Debug)]
256pub struct SurfaceTexture {
257 index: u32,
258 texture: Texture,
259 metadata: Box<dyn swapchain::SurfaceTextureMetadata>,
260}
261
262impl crate::DynSurfaceTexture for SurfaceTexture {}
263
264impl Borrow<Texture> for SurfaceTexture {
265 fn borrow(&self) -> &Texture {
266 &self.texture
267 }
268}
269
270impl Borrow<dyn crate::DynTexture> for SurfaceTexture {
271 fn borrow(&self) -> &dyn crate::DynTexture {
272 &self.texture
273 }
274}
275
276pub struct Adapter {
277 raw: vk::PhysicalDevice,
278 instance: Arc<InstanceShared>,
279 //queue_families: Vec<vk::QueueFamilyProperties>,
280 known_memory_flags: vk::MemoryPropertyFlags,
281 phd_capabilities: adapter::PhysicalDeviceProperties,
282 phd_features: PhysicalDeviceFeatures,
283 downlevel_flags: wgt::DownlevelFlags,
284 private_caps: PrivateCapabilities,
285 workarounds: Workarounds,
286}
287
288// TODO there's no reason why this can't be unified--the function pointers should all be the same--it's not clear how to do this with `ash`.
289enum ExtensionFn<T> {
290 /// The loaded function pointer struct for an extension.
291 Extension(T),
292 /// The extension was promoted to a core version of Vulkan and the functions on `ash`'s `DeviceV1_x` traits should be used.
293 Promoted,
294}
295
296struct DeviceExtensionFunctions {
297 debug_utils: Option<ext::debug_utils::Device>,
298 draw_indirect_count: Option<khr::draw_indirect_count::Device>,
299 timeline_semaphore: Option<ExtensionFn<khr::timeline_semaphore::Device>>,
300 ray_tracing: Option<RayTracingDeviceExtensionFunctions>,
301 mesh_shading: Option<ext::mesh_shader::Device>,
302 #[cfg_attr(not(unix), allow(dead_code))]
303 external_memory_fd: Option<khr::external_memory_fd::Device>,
304}
305
306struct RayTracingDeviceExtensionFunctions {
307 acceleration_structure: khr::acceleration_structure::Device,
308 buffer_device_address: khr::buffer_device_address::Device,
309}
310
311/// Set of internal capabilities, which don't show up in the exposed
312/// device geometry, but affect the code paths taken internally.
313#[derive(Clone, Debug)]
314struct PrivateCapabilities {
315 image_view_usage: bool,
316 timeline_semaphores: bool,
317 texture_d24: bool,
318 texture_d24_s8: bool,
319 texture_s8: bool,
320 /// Ability to present contents to any screen. Only needed to work around broken platform configurations.
321 can_present: bool,
322 non_coherent_map_mask: wgt::BufferAddress,
323 multi_draw_indirect: bool,
324 max_draw_indirect_count: u32,
325
326 /// True if this adapter advertises the [`robustBufferAccess`][vrba] feature.
327 ///
328 /// Note that Vulkan's `robustBufferAccess` is not sufficient to implement
329 /// `wgpu_hal`'s guarantee that shaders will not access buffer contents via
330 /// a given bindgroup binding outside that binding's [accessible
331 /// region][ar]. Enabling `robustBufferAccess` does ensure that
332 /// out-of-bounds reads and writes are not undefined behavior (that's good),
333 /// but still permits out-of-bounds reads to return data from anywhere
334 /// within the buffer, not just the accessible region.
335 ///
336 /// [ar]: ../struct.BufferBinding.html#accessible-region
337 /// [vrba]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#features-robustBufferAccess
338 robust_buffer_access: bool,
339
340 robust_image_access: bool,
341
342 /// True if this adapter supports the [`VK_EXT_robustness2`] extension's
343 /// [`robustBufferAccess2`] feature.
344 ///
345 /// This is sufficient to implement `wgpu_hal`'s [required bounds-checking][ar] of
346 /// shader accesses to buffer contents. If this feature is not available,
347 /// this backend must have Naga inject bounds checks in the generated
348 /// SPIR-V.
349 ///
350 /// [`VK_EXT_robustness2`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_EXT_robustness2.html
351 /// [`robustBufferAccess2`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPhysicalDeviceRobustness2FeaturesEXT.html#features-robustBufferAccess2
352 /// [ar]: ../struct.BufferBinding.html#accessible-region
353 robust_buffer_access2: bool,
354
355 robust_image_access2: bool,
356 zero_initialize_workgroup_memory: bool,
357 image_format_list: bool,
358 maximum_samplers: u32,
359
360 /// True if this adapter supports the [`VK_KHR_shader_integer_dot_product`] extension
361 /// (promoted to Vulkan 1.3).
362 ///
363 /// This is used to generate optimized code for WGSL's `dot4{I, U}8Packed`.
364 ///
365 /// [`VK_KHR_shader_integer_dot_product`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_KHR_shader_integer_dot_product.html
366 shader_integer_dot_product: bool,
367
368 /// True if this adapter supports 8-bit integers provided by the
369 /// [`VK_KHR_shader_float16_int8`] extension (promoted to Vulkan 1.2).
370 ///
371 /// Allows shaders to declare the "Int8" capability. Note, however, that this
372 /// feature alone allows the use of 8-bit integers "only in the `Private`,
373 /// `Workgroup` (for non-Block variables), and `Function` storage classes"
374 /// ([see spec]). To use 8-bit integers in the interface storage classes (e.g.,
375 /// `StorageBuffer`), you also need to enable the corresponding feature in
376 /// `VkPhysicalDevice8BitStorageFeatures` and declare the corresponding SPIR-V
377 /// capability (e.g., `StorageBuffer8BitAccess`).
378 ///
379 /// [`VK_KHR_shader_float16_int8`]: https://registry.khronos.org/vulkan/specs/latest/man/html/VK_KHR_shader_float16_int8.html
380 /// [see spec]: https://registry.khronos.org/vulkan/specs/latest/man/html/VkPhysicalDeviceShaderFloat16Int8Features.html#extension-features-shaderInt8
381 shader_int8: bool,
382
383 /// This is done to panic before undefined behavior, and is imperfect.
384 /// Basically, to allow implementations to emulate mv using instancing, if you
385 /// want to draw `n` instances to VR, you must draw `2n` instances, but you
386 /// can never draw more than `u32::MAX` instances. Therefore, when drawing
387 /// multiview on some vulkan implementations, it might restrict the instance
388 /// count, which isn't usually a thing in webgpu. We don't expose this limit
389 /// because its strange, i.e. only occurs on certain vulkan implementations
390 /// if you are drawing more than 128 million instances. We still want to avoid
391 /// undefined behavior in this situation, so we panic if the limit is violated.
392 multiview_instance_index_limit: u32,
393
394 /// BufferUsages::ACCELERATION_STRUCTURE_SCRATCH allows usage as a scratch buffer.
395 /// Vulkan has no way to specify this as a usage, and it maps to other usages, but
396 /// these usages do not have as high of an alignment requirement using the buffer as
397 /// a scratch buffer when building acceleration structures.
398 scratch_buffer_alignment: u32,
399}
400
401bitflags::bitflags!(
402 /// Workaround flags.
403 #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
404 pub struct Workarounds: u32 {
405 /// Only generate SPIR-V for one entry point at a time.
406 const SEPARATE_ENTRY_POINTS = 0x1;
407 /// Qualcomm OOMs when there are zero color attachments but a non-null pointer
408 /// to a subpass resolve attachment array. This nulls out that pointer in that case.
409 const EMPTY_RESOLVE_ATTACHMENT_LISTS = 0x2;
410 /// If the following code returns false, then nvidia will end up filling the wrong range.
411 ///
412 /// ```skip
413 /// fn nvidia_succeeds() -> bool {
414 /// # let (copy_length, start_offset) = (0, 0);
415 /// if copy_length >= 4096 {
416 /// if start_offset % 16 != 0 {
417 /// if copy_length == 4096 {
418 /// return true;
419 /// }
420 /// if copy_length % 16 == 0 {
421 /// return false;
422 /// }
423 /// }
424 /// }
425 /// true
426 /// }
427 /// ```
428 ///
429 /// As such, we need to make sure all calls to vkCmdFillBuffer are aligned to 16 bytes
430 /// if they cover a range of 4096 bytes or more.
431 const FORCE_FILL_BUFFER_WITH_SIZE_GREATER_4096_ALIGNED_OFFSET_16 = 0x4;
432 }
433);
434
435#[derive(Clone, Debug, Eq, Hash, PartialEq)]
436struct AttachmentKey {
437 format: vk::Format,
438 layout: vk::ImageLayout,
439 ops: crate::AttachmentOps,
440}
441
442impl AttachmentKey {
443 /// Returns an attachment key for a compatible attachment.
444 fn compatible(format: vk::Format, layout: vk::ImageLayout) -> Self {
445 Self {
446 format,
447 layout,
448 ops: crate::AttachmentOps::all(),
449 }
450 }
451}
452
453#[derive(Clone, Eq, Hash, PartialEq)]
454struct ColorAttachmentKey {
455 base: AttachmentKey,
456 resolve: Option<AttachmentKey>,
457}
458
459#[derive(Clone, Eq, Hash, PartialEq)]
460struct DepthStencilAttachmentKey {
461 base: AttachmentKey,
462 stencil_ops: crate::AttachmentOps,
463}
464
465#[derive(Clone, Eq, Default, Hash, PartialEq)]
466struct RenderPassKey {
467 colors: ArrayVec<Option<ColorAttachmentKey>, { crate::MAX_COLOR_ATTACHMENTS }>,
468 depth_stencil: Option<DepthStencilAttachmentKey>,
469 sample_count: u32,
470 multiview_mask: Option<NonZeroU32>,
471}
472
473struct DeviceShared {
474 raw: ash::Device,
475 family_index: u32,
476 queue_index: u32,
477 raw_queue: vk::Queue,
478 instance: Arc<InstanceShared>,
479 physical_device: vk::PhysicalDevice,
480 enabled_extensions: Vec<&'static CStr>,
481 extension_fns: DeviceExtensionFunctions,
482 vendor_id: u32,
483 pipeline_cache_validation_key: [u8; 16],
484 timestamp_period: f32,
485 private_caps: PrivateCapabilities,
486 workarounds: Workarounds,
487 features: wgt::Features,
488 render_passes: Mutex<FastHashMap<RenderPassKey, vk::RenderPass>>,
489 sampler_cache: Mutex<sampler::SamplerCache>,
490 memory_allocations_counter: InternalCounter,
491
492 /// Because we have cached framebuffers which are not deleted from until
493 /// the device is destroyed, if the implementation of vulkan re-uses handles
494 /// we need some way to differentiate between the old handle and the new handle.
495 /// This factory allows us to have a dedicated identity value for each texture.
496 texture_identity_factory: ResourceIdentityFactory<vk::Image>,
497 /// As above, for texture views.
498 texture_view_identity_factory: ResourceIdentityFactory<vk::ImageView>,
499
500 empty_descriptor_set_layout: vk::DescriptorSetLayout,
501
502 // The `drop_guard` field must be the last field of this struct so it is dropped last.
503 // Do not add new fields after it.
504 drop_guard: Option<crate::DropGuard>,
505}
506
507impl Drop for DeviceShared {
508 fn drop(&mut self) {
509 for &raw in self.render_passes.lock().values() {
510 unsafe { self.raw.destroy_render_pass(raw, None) };
511 }
512 unsafe {
513 self.raw
514 .destroy_descriptor_set_layout(self.empty_descriptor_set_layout, None)
515 };
516 if self.drop_guard.is_none() {
517 unsafe { self.raw.destroy_device(None) };
518 }
519 }
520}
521
522pub struct Device {
523 mem_allocator: Mutex<gpu_allocator::vulkan::Allocator>,
524 desc_allocator:
525 Mutex<gpu_descriptor::DescriptorAllocator<vk::DescriptorPool, vk::DescriptorSet>>,
526 valid_ash_memory_types: u32,
527 naga_options: naga::back::spv::Options<'static>,
528 #[cfg(feature = "renderdoc")]
529 render_doc: crate::auxil::renderdoc::RenderDoc,
530 counters: Arc<wgt::HalCounters>,
531 // Struct members are dropped from first to last, put the Device last to ensure that
532 // all resources that depends on it are destroyed before it like the mem_allocator
533 shared: Arc<DeviceShared>,
534}
535
536impl Drop for Device {
537 fn drop(&mut self) {
538 unsafe { self.desc_allocator.lock().cleanup(&*self.shared) };
539 }
540}
541
542/// Semaphores for forcing queue submissions to run in order.
543///
544/// The [`wgpu_hal::Queue`] trait promises that if two calls to [`submit`] are
545/// ordered, then the first submission will finish on the GPU before the second
546/// submission begins. To get this behavior on Vulkan we need to pass semaphores
547/// to [`vkQueueSubmit`] for the commands to wait on before beginning execution,
548/// and to signal when their execution is done.
549///
550/// Normally this can be done with a single semaphore, waited on and then
551/// signalled for each submission. At any given time there's exactly one
552/// submission that would signal the semaphore, and exactly one waiting on it,
553/// as Vulkan requires.
554///
555/// However, as of Oct 2021, bug [#5508] in the Mesa ANV drivers caused them to
556/// hang if we use a single semaphore. The workaround is to alternate between
557/// two semaphores. The bug has been fixed in Mesa, but we should probably keep
558/// the workaround until, say, Oct 2026.
559///
560/// [`wgpu_hal::Queue`]: crate::Queue
561/// [`submit`]: crate::Queue::submit
562/// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
563/// [#5508]: https://gitlab.freedesktop.org/mesa/mesa/-/issues/5508
564#[derive(Clone)]
565struct RelaySemaphores {
566 /// The semaphore the next submission should wait on before beginning
567 /// execution on the GPU. This is `None` for the first submission, which
568 /// should not wait on anything at all.
569 wait: Option<vk::Semaphore>,
570
571 /// The semaphore the next submission should signal when it has finished
572 /// execution on the GPU.
573 signal: vk::Semaphore,
574}
575
576impl RelaySemaphores {
577 fn new(device: &DeviceShared) -> Result<Self, crate::DeviceError> {
578 Ok(Self {
579 wait: None,
580 signal: device.new_binary_semaphore("RelaySemaphores: 1")?,
581 })
582 }
583
584 /// Advances the semaphores, returning the semaphores that should be used for a submission.
585 fn advance(&mut self, device: &DeviceShared) -> Result<Self, crate::DeviceError> {
586 let old = self.clone();
587
588 // Build the state for the next submission.
589 match self.wait {
590 None => {
591 // The `old` values describe the first submission to this queue.
592 // The second submission should wait on `old.signal`, and then
593 // signal a new semaphore which we'll create now.
594 self.wait = Some(old.signal);
595 self.signal = device.new_binary_semaphore("RelaySemaphores: 2")?;
596 }
597 Some(ref mut wait) => {
598 // What this submission signals, the next should wait.
599 mem::swap(wait, &mut self.signal);
600 }
601 };
602
603 Ok(old)
604 }
605
606 /// Destroys the semaphores.
607 unsafe fn destroy(&self, device: &ash::Device) {
608 unsafe {
609 if let Some(wait) = self.wait {
610 device.destroy_semaphore(wait, None);
611 }
612 device.destroy_semaphore(self.signal, None);
613 }
614 }
615}
616
617pub struct Queue {
618 raw: vk::Queue,
619 device: Arc<DeviceShared>,
620 family_index: u32,
621 relay_semaphores: Mutex<RelaySemaphores>,
622 signal_semaphores: Mutex<SemaphoreList>,
623}
624
625impl Queue {
626 pub fn as_raw(&self) -> vk::Queue {
627 self.raw
628 }
629}
630
631impl Drop for Queue {
632 fn drop(&mut self) {
633 unsafe { self.relay_semaphores.lock().destroy(&self.device.raw) };
634 }
635}
636#[derive(Debug)]
637enum BufferMemoryBacking {
638 Managed(gpu_allocator::vulkan::Allocation),
639 VulkanMemory {
640 memory: vk::DeviceMemory,
641 offset: u64,
642 size: u64,
643 },
644}
645impl BufferMemoryBacking {
646 fn memory(&self) -> vk::DeviceMemory {
647 match self {
648 Self::Managed(m) => unsafe { m.memory() },
649 Self::VulkanMemory { memory, .. } => *memory,
650 }
651 }
652 fn offset(&self) -> u64 {
653 match self {
654 Self::Managed(m) => m.offset(),
655 Self::VulkanMemory { offset, .. } => *offset,
656 }
657 }
658 fn size(&self) -> u64 {
659 match self {
660 Self::Managed(m) => m.size(),
661 Self::VulkanMemory { size, .. } => *size,
662 }
663 }
664}
665#[derive(Debug)]
666pub struct Buffer {
667 raw: vk::Buffer,
668 allocation: Option<Mutex<BufferMemoryBacking>>,
669}
670impl Buffer {
671 /// # Safety
672 ///
673 /// - `vk_buffer`'s memory must be managed by the caller
674 /// - Externally imported buffers can't be mapped by `wgpu`
675 pub unsafe fn from_raw(vk_buffer: vk::Buffer) -> Self {
676 Self {
677 raw: vk_buffer,
678 allocation: None,
679 }
680 }
681 /// # Safety
682 /// - We will use this buffer and the buffer's backing memory range as if we have exclusive ownership over it, until the wgpu resource is dropped and the wgpu-hal object is cleaned up
683 /// - Externally imported buffers can't be mapped by `wgpu`
684 /// - `offset` and `size` must be valid with the allocation of `memory`
685 pub unsafe fn from_raw_managed(
686 vk_buffer: vk::Buffer,
687 memory: vk::DeviceMemory,
688 offset: u64,
689 size: u64,
690 ) -> Self {
691 Self {
692 raw: vk_buffer,
693 allocation: Some(Mutex::new(BufferMemoryBacking::VulkanMemory {
694 memory,
695 offset,
696 size,
697 })),
698 }
699 }
700}
701
702impl crate::DynBuffer for Buffer {}
703
704#[derive(Debug)]
705pub struct AccelerationStructure {
706 raw: vk::AccelerationStructureKHR,
707 buffer: vk::Buffer,
708 allocation: gpu_allocator::vulkan::Allocation,
709 compacted_size_query: Option<vk::QueryPool>,
710}
711
712impl crate::DynAccelerationStructure for AccelerationStructure {}
713
714#[derive(Debug)]
715pub enum TextureMemory {
716 // shared memory in GPU allocator (owned by wgpu-hal)
717 Allocation(gpu_allocator::vulkan::Allocation),
718
719 // dedicated memory (owned by wgpu-hal)
720 Dedicated(vk::DeviceMemory),
721
722 // memory not owned by wgpu
723 External,
724}
725
726#[derive(Debug)]
727pub struct Texture {
728 raw: vk::Image,
729 memory: TextureMemory,
730 format: wgt::TextureFormat,
731 copy_size: crate::CopyExtent,
732 identity: ResourceIdentity<vk::Image>,
733
734 // The `drop_guard` field must be the last field of this struct so it is dropped last.
735 // Do not add new fields after it.
736 drop_guard: Option<crate::DropGuard>,
737}
738
739impl crate::DynTexture for Texture {}
740
741impl Texture {
742 /// # Safety
743 ///
744 /// - The image handle must not be manually destroyed
745 pub unsafe fn raw_handle(&self) -> vk::Image {
746 self.raw
747 }
748
749 /// # Safety
750 ///
751 /// - The caller must not free the `vk::DeviceMemory` or
752 /// `gpu_alloc::MemoryBlock` in the returned `TextureMemory`.
753 pub unsafe fn memory(&self) -> &TextureMemory {
754 &self.memory
755 }
756}
757
758#[derive(Debug)]
759pub struct TextureView {
760 raw_texture: vk::Image,
761 raw: vk::ImageView,
762 _layers: NonZeroU32,
763 format: wgt::TextureFormat,
764 raw_format: vk::Format,
765 base_mip_level: u32,
766 dimension: wgt::TextureViewDimension,
767 texture_identity: ResourceIdentity<vk::Image>,
768 view_identity: ResourceIdentity<vk::ImageView>,
769}
770
771impl crate::DynTextureView for TextureView {}
772
773impl TextureView {
774 /// # Safety
775 ///
776 /// - The image view handle must not be manually destroyed
777 pub unsafe fn raw_handle(&self) -> vk::ImageView {
778 self.raw
779 }
780
781 /// Returns the raw texture view, along with its identity.
782 fn identified_raw_view(&self) -> IdentifiedTextureView {
783 IdentifiedTextureView {
784 raw: self.raw,
785 identity: self.view_identity,
786 }
787 }
788}
789
790#[derive(Debug)]
791pub struct Sampler {
792 raw: vk::Sampler,
793 create_info: vk::SamplerCreateInfo<'static>,
794}
795
796impl crate::DynSampler for Sampler {}
797
798/// Information about a binding within a specific BindGroupLayout / BindGroup.
799/// This will be used to construct a [`naga::back::spv::BindingInfo`], where
800/// the descriptor set value will be taken from the index of the group.
801#[derive(Copy, Clone, Debug)]
802struct BindingInfo {
803 binding: u32,
804 binding_array_size: Option<NonZeroU32>,
805}
806
807#[derive(Debug)]
808pub struct BindGroupLayout {
809 raw: vk::DescriptorSetLayout,
810 desc_count: gpu_descriptor::DescriptorTotalCount,
811 /// Sorted list of entries.
812 entries: Box<[wgt::BindGroupLayoutEntry]>,
813 /// Map of original binding index to remapped binding index and optional
814 /// array size.
815 binding_map: Vec<(u32, BindingInfo)>,
816 contains_binding_arrays: bool,
817}
818
819impl crate::DynBindGroupLayout for BindGroupLayout {}
820
821#[derive(Debug)]
822pub struct PipelineLayout {
823 raw: vk::PipelineLayout,
824 binding_map: naga::back::spv::BindingMap,
825}
826
827impl crate::DynPipelineLayout for PipelineLayout {}
828
829#[derive(Debug)]
830pub struct BindGroup {
831 set: gpu_descriptor::DescriptorSet<vk::DescriptorSet>,
832}
833
834impl crate::DynBindGroup for BindGroup {}
835
836/// Miscellaneous allocation recycling pool for `CommandAllocator`.
837#[derive(Default)]
838struct Temp {
839 marker: Vec<u8>,
840 buffer_barriers: Vec<vk::BufferMemoryBarrier<'static>>,
841 image_barriers: Vec<vk::ImageMemoryBarrier<'static>>,
842}
843
844impl Temp {
845 fn clear(&mut self) {
846 self.marker.clear();
847 self.buffer_barriers.clear();
848 self.image_barriers.clear();
849 }
850
851 fn make_c_str(&mut self, name: &str) -> &CStr {
852 self.marker.clear();
853 self.marker.extend_from_slice(name.as_bytes());
854 self.marker.push(0);
855 unsafe { CStr::from_bytes_with_nul_unchecked(&self.marker) }
856 }
857}
858
859/// Generates unique IDs for each resource of type `T`.
860///
861/// Because vk handles are not permanently unique, this
862/// provides a way to generate unique IDs for each resource.
863struct ResourceIdentityFactory<T> {
864 #[cfg(not(target_has_atomic = "64"))]
865 next_id: Mutex<u64>,
866 #[cfg(target_has_atomic = "64")]
867 next_id: core::sync::atomic::AtomicU64,
868 _phantom: PhantomData<T>,
869}
870
871impl<T> ResourceIdentityFactory<T> {
872 fn new() -> Self {
873 Self {
874 #[cfg(not(target_has_atomic = "64"))]
875 next_id: Mutex::new(0),
876 #[cfg(target_has_atomic = "64")]
877 next_id: core::sync::atomic::AtomicU64::new(0),
878 _phantom: PhantomData,
879 }
880 }
881
882 /// Returns a new unique ID for a resource of type `T`.
883 fn next(&self) -> ResourceIdentity<T> {
884 #[cfg(not(target_has_atomic = "64"))]
885 {
886 let mut next_id = self.next_id.lock();
887 let id = *next_id;
888 *next_id += 1;
889 ResourceIdentity {
890 id,
891 _phantom: PhantomData,
892 }
893 }
894
895 #[cfg(target_has_atomic = "64")]
896 ResourceIdentity {
897 id: self
898 .next_id
899 .fetch_add(1, core::sync::atomic::Ordering::Relaxed),
900 _phantom: PhantomData,
901 }
902 }
903}
904
905/// A unique identifier for a resource of type `T`.
906///
907/// This is used as a hashable key for resources, which
908/// is permanently unique through the lifetime of the program.
909#[derive(Debug, Copy, Clone, Eq, Hash, PartialEq)]
910struct ResourceIdentity<T> {
911 id: u64,
912 _phantom: PhantomData<T>,
913}
914
915#[derive(Clone, Eq, Hash, PartialEq)]
916struct FramebufferKey {
917 raw_pass: vk::RenderPass,
918 /// Because this is used as a key in a hash map, we need to include the identity
919 /// so that this hashes differently, even if the ImageView handles are the same
920 /// between different views.
921 attachment_identities: ArrayVec<ResourceIdentity<vk::ImageView>, { MAX_TOTAL_ATTACHMENTS }>,
922 /// While this is redundant for calculating the hash, we need access to an array
923 /// of all the raw ImageViews when we are creating the actual framebuffer,
924 /// so we store this here.
925 attachment_views: ArrayVec<vk::ImageView, { MAX_TOTAL_ATTACHMENTS }>,
926 extent: wgt::Extent3d,
927}
928
929impl FramebufferKey {
930 fn push_view(&mut self, view: IdentifiedTextureView) {
931 self.attachment_identities.push(view.identity);
932 self.attachment_views.push(view.raw);
933 }
934}
935
936/// A texture view paired with its identity.
937#[derive(Copy, Clone)]
938struct IdentifiedTextureView {
939 raw: vk::ImageView,
940 identity: ResourceIdentity<vk::ImageView>,
941}
942
943#[derive(Clone, Eq, Hash, PartialEq)]
944struct TempTextureViewKey {
945 texture: vk::Image,
946 /// As this is used in a hashmap, we need to
947 /// include the identity so that this hashes differently,
948 /// even if the Image handles are the same between different images.
949 texture_identity: ResourceIdentity<vk::Image>,
950 format: vk::Format,
951 mip_level: u32,
952 depth_slice: u32,
953}
954
955pub struct CommandEncoder {
956 raw: vk::CommandPool,
957 device: Arc<DeviceShared>,
958
959 /// The current command buffer, if `self` is in the ["recording"]
960 /// state.
961 ///
962 /// ["recording"]: crate::CommandEncoder
963 ///
964 /// If non-`null`, the buffer is in the Vulkan "recording" state.
965 active: vk::CommandBuffer,
966
967 /// What kind of pass we are currently within: compute or render.
968 bind_point: vk::PipelineBindPoint,
969
970 /// Allocation recycling pool for this encoder.
971 temp: Temp,
972
973 /// A pool of available command buffers.
974 ///
975 /// These are all in the Vulkan "initial" state.
976 free: Vec<vk::CommandBuffer>,
977
978 /// A pool of discarded command buffers.
979 ///
980 /// These could be in any Vulkan state except "pending".
981 discarded: Vec<vk::CommandBuffer>,
982
983 /// If this is true, the active renderpass enabled a debug span,
984 /// and needs to be disabled on renderpass close.
985 rpass_debug_marker_active: bool,
986
987 /// If set, the end of the next render/compute pass will write a timestamp at
988 /// the given pool & location.
989 end_of_pass_timer_query: Option<(vk::QueryPool, u32)>,
990
991 framebuffers: FastHashMap<FramebufferKey, vk::Framebuffer>,
992 temp_texture_views: FastHashMap<TempTextureViewKey, IdentifiedTextureView>,
993
994 counters: Arc<wgt::HalCounters>,
995
996 current_pipeline_is_multiview: bool,
997}
998
999impl Drop for CommandEncoder {
1000 fn drop(&mut self) {
1001 // SAFETY:
1002 //
1003 // VUID-vkDestroyCommandPool-commandPool-00041: wgpu_hal requires that a
1004 // `CommandBuffer` must live until its execution is complete, and that a
1005 // `CommandBuffer` must not outlive the `CommandEncoder` that built it.
1006 // Thus, we know that none of our `CommandBuffers` are in the "pending"
1007 // state.
1008 //
1009 // The other VUIDs are pretty obvious.
1010 unsafe {
1011 // `vkDestroyCommandPool` also frees any command buffers allocated
1012 // from that pool, so there's no need to explicitly call
1013 // `vkFreeCommandBuffers` on `cmd_encoder`'s `free` and `discarded`
1014 // fields.
1015 self.device.raw.destroy_command_pool(self.raw, None);
1016 }
1017
1018 for (_, fb) in self.framebuffers.drain() {
1019 unsafe { self.device.raw.destroy_framebuffer(fb, None) };
1020 }
1021
1022 for (_, view) in self.temp_texture_views.drain() {
1023 unsafe { self.device.raw.destroy_image_view(view.raw, None) };
1024 }
1025
1026 self.counters.command_encoders.sub(1);
1027 }
1028}
1029
1030impl CommandEncoder {
1031 /// # Safety
1032 ///
1033 /// - The command buffer handle must not be manually destroyed
1034 pub unsafe fn raw_handle(&self) -> vk::CommandBuffer {
1035 self.active
1036 }
1037}
1038
1039impl fmt::Debug for CommandEncoder {
1040 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1041 f.debug_struct("CommandEncoder")
1042 .field("raw", &self.raw)
1043 .finish()
1044 }
1045}
1046
1047#[derive(Debug)]
1048pub struct CommandBuffer {
1049 raw: vk::CommandBuffer,
1050}
1051
1052impl crate::DynCommandBuffer for CommandBuffer {}
1053
1054#[derive(Debug)]
1055pub enum ShaderModule {
1056 Raw(vk::ShaderModule),
1057 Intermediate {
1058 naga_shader: crate::NagaShader,
1059 runtime_checks: wgt::ShaderRuntimeChecks,
1060 },
1061}
1062
1063impl crate::DynShaderModule for ShaderModule {}
1064
1065#[derive(Debug)]
1066pub struct RenderPipeline {
1067 raw: vk::Pipeline,
1068 is_multiview: bool,
1069}
1070
1071impl crate::DynRenderPipeline for RenderPipeline {}
1072
1073#[derive(Debug)]
1074pub struct ComputePipeline {
1075 raw: vk::Pipeline,
1076}
1077
1078impl crate::DynComputePipeline for ComputePipeline {}
1079
1080#[derive(Debug)]
1081pub struct PipelineCache {
1082 raw: vk::PipelineCache,
1083}
1084
1085impl crate::DynPipelineCache for PipelineCache {}
1086
1087#[derive(Debug)]
1088pub struct QuerySet {
1089 raw: vk::QueryPool,
1090}
1091
1092impl crate::DynQuerySet for QuerySet {}
1093
1094/// The [`Api::Fence`] type for [`vulkan::Api`].
1095///
1096/// This is an `enum` because there are two possible implementations of
1097/// `wgpu-hal` fences on Vulkan: Vulkan fences, which work on any version of
1098/// Vulkan, and Vulkan timeline semaphores, which are easier and cheaper but
1099/// require non-1.0 features.
1100///
1101/// [`Device::create_fence`] returns a [`TimelineSemaphore`] if
1102/// [`VK_KHR_timeline_semaphore`] is available and enabled, and a [`FencePool`]
1103/// otherwise.
1104///
1105/// [`Api::Fence`]: crate::Api::Fence
1106/// [`vulkan::Api`]: Api
1107/// [`Device::create_fence`]: crate::Device::create_fence
1108/// [`TimelineSemaphore`]: Fence::TimelineSemaphore
1109/// [`VK_KHR_timeline_semaphore`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VK_KHR_timeline_semaphore
1110/// [`FencePool`]: Fence::FencePool
1111#[derive(Debug)]
1112pub enum Fence {
1113 /// A Vulkan [timeline semaphore].
1114 ///
1115 /// These are simpler to use than Vulkan fences, since timeline semaphores
1116 /// work exactly the way [`wpgu_hal::Api::Fence`] is specified to work.
1117 ///
1118 /// [timeline semaphore]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-semaphores
1119 /// [`wpgu_hal::Api::Fence`]: crate::Api::Fence
1120 TimelineSemaphore(vk::Semaphore),
1121
1122 /// A collection of Vulkan [fence]s, each associated with a [`FenceValue`].
1123 ///
1124 /// The effective [`FenceValue`] of this variant is the greater of
1125 /// `last_completed` and the maximum value associated with a signalled fence
1126 /// in `active`.
1127 ///
1128 /// Fences are available in all versions of Vulkan, but since they only have
1129 /// two states, "signaled" and "unsignaled", we need to use a separate fence
1130 /// for each queue submission we might want to wait for, and remember which
1131 /// [`FenceValue`] each one represents.
1132 ///
1133 /// [fence]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-fences
1134 /// [`FenceValue`]: crate::FenceValue
1135 FencePool {
1136 last_completed: crate::FenceValue,
1137 /// The pending fence values have to be ascending.
1138 active: Vec<(crate::FenceValue, vk::Fence)>,
1139 free: Vec<vk::Fence>,
1140 },
1141}
1142
1143impl crate::DynFence for Fence {}
1144
1145impl Fence {
1146 /// Return the highest [`FenceValue`] among the signalled fences in `active`.
1147 ///
1148 /// As an optimization, assume that we already know that the fence has
1149 /// reached `last_completed`, and don't bother checking fences whose values
1150 /// are less than that: those fences remain in the `active` array only
1151 /// because we haven't called `maintain` yet to clean them up.
1152 ///
1153 /// [`FenceValue`]: crate::FenceValue
1154 fn check_active(
1155 device: &ash::Device,
1156 mut last_completed: crate::FenceValue,
1157 active: &[(crate::FenceValue, vk::Fence)],
1158 ) -> Result<crate::FenceValue, crate::DeviceError> {
1159 for &(value, raw) in active.iter() {
1160 unsafe {
1161 if value > last_completed
1162 && device
1163 .get_fence_status(raw)
1164 .map_err(map_host_device_oom_and_lost_err)?
1165 {
1166 last_completed = value;
1167 }
1168 }
1169 }
1170 Ok(last_completed)
1171 }
1172
1173 /// Return the highest signalled [`FenceValue`] for `self`.
1174 ///
1175 /// [`FenceValue`]: crate::FenceValue
1176 fn get_latest(
1177 &self,
1178 device: &ash::Device,
1179 extension: Option<&ExtensionFn<khr::timeline_semaphore::Device>>,
1180 ) -> Result<crate::FenceValue, crate::DeviceError> {
1181 match *self {
1182 Self::TimelineSemaphore(raw) => unsafe {
1183 Ok(match *extension.unwrap() {
1184 ExtensionFn::Extension(ref ext) => ext
1185 .get_semaphore_counter_value(raw)
1186 .map_err(map_host_device_oom_and_lost_err)?,
1187 ExtensionFn::Promoted => device
1188 .get_semaphore_counter_value(raw)
1189 .map_err(map_host_device_oom_and_lost_err)?,
1190 })
1191 },
1192 Self::FencePool {
1193 last_completed,
1194 ref active,
1195 free: _,
1196 } => Self::check_active(device, last_completed, active),
1197 }
1198 }
1199
1200 /// Trim the internal state of this [`Fence`].
1201 ///
1202 /// This function has no externally visible effect, but you should call it
1203 /// periodically to keep this fence's resource consumption under control.
1204 ///
1205 /// For fences using the [`FencePool`] implementation, this function
1206 /// recycles fences that have been signaled. If you don't call this,
1207 /// [`Queue::submit`] will just keep allocating a new Vulkan fence every
1208 /// time it's called.
1209 ///
1210 /// [`FencePool`]: Fence::FencePool
1211 /// [`Queue::submit`]: crate::Queue::submit
1212 fn maintain(&mut self, device: &ash::Device) -> Result<(), crate::DeviceError> {
1213 match *self {
1214 Self::TimelineSemaphore(_) => {}
1215 Self::FencePool {
1216 ref mut last_completed,
1217 ref mut active,
1218 ref mut free,
1219 } => {
1220 let latest = Self::check_active(device, *last_completed, active)?;
1221 let base_free = free.len();
1222 for &(value, raw) in active.iter() {
1223 if value <= latest {
1224 free.push(raw);
1225 }
1226 }
1227 if free.len() != base_free {
1228 active.retain(|&(value, _)| value > latest);
1229 unsafe { device.reset_fences(&free[base_free..]) }
1230 .map_err(map_device_oom_err)?
1231 }
1232 *last_completed = latest;
1233 }
1234 }
1235 Ok(())
1236 }
1237}
1238
1239impl crate::Queue for Queue {
1240 type A = Api;
1241
1242 unsafe fn submit(
1243 &self,
1244 command_buffers: &[&CommandBuffer],
1245 surface_textures: &[&SurfaceTexture],
1246 (signal_fence, signal_value): (&mut Fence, crate::FenceValue),
1247 ) -> Result<(), crate::DeviceError> {
1248 let mut fence_raw = vk::Fence::null();
1249
1250 let mut wait_semaphores = SemaphoreList::new(SemaphoreListMode::Wait);
1251 let mut signal_semaphores = SemaphoreList::new(SemaphoreListMode::Signal);
1252
1253 // Double check that the same swapchain image isn't being given to us multiple times,
1254 // as that will deadlock when we try to lock them all.
1255 debug_assert!(
1256 {
1257 let mut check = HashSet::with_capacity(surface_textures.len());
1258 // We compare the Box by pointer, as Eq isn't well defined for SurfaceSemaphores.
1259 for st in surface_textures {
1260 let ptr: *const () = <*const _>::cast(&*st.metadata);
1261 check.insert(ptr as usize);
1262 }
1263 check.len() == surface_textures.len()
1264 },
1265 "More than one surface texture is being used from the same swapchain. This will cause a deadlock in release."
1266 );
1267
1268 let locked_swapchain_semaphores = surface_textures
1269 .iter()
1270 .map(|st| st.metadata.get_semaphore_guard())
1271 .collect::<Vec<_>>();
1272
1273 for mut semaphores in locked_swapchain_semaphores {
1274 semaphores.set_used_fence_value(signal_value);
1275
1276 // If we're the first submission to operate on this image, wait on
1277 // its acquire semaphore, to make sure the presentation engine is
1278 // done with it.
1279 if let Some(sem) = semaphores.get_acquire_wait_semaphore() {
1280 wait_semaphores.push_wait(sem, vk::PipelineStageFlags::TOP_OF_PIPE);
1281 }
1282
1283 // Get a semaphore to signal when we're done writing to this surface
1284 // image. Presentation of this image will wait for this.
1285 let signal_semaphore = semaphores.get_submit_signal_semaphore(&self.device)?;
1286 signal_semaphores.push_signal(signal_semaphore);
1287 }
1288
1289 let mut guard = self.signal_semaphores.lock();
1290 if !guard.is_empty() {
1291 signal_semaphores.append(&mut guard);
1292 }
1293
1294 // In order for submissions to be strictly ordered, we encode a dependency between each submission
1295 // using a pair of semaphores. This adds a wait if it is needed, and signals the next semaphore.
1296 let semaphore_state = self.relay_semaphores.lock().advance(&self.device)?;
1297
1298 if let Some(sem) = semaphore_state.wait {
1299 wait_semaphores.push_wait(
1300 SemaphoreType::Binary(sem),
1301 vk::PipelineStageFlags::TOP_OF_PIPE,
1302 );
1303 }
1304
1305 signal_semaphores.push_signal(SemaphoreType::Binary(semaphore_state.signal));
1306
1307 // We need to signal our wgpu::Fence if we have one, this adds it to the signal list.
1308 signal_fence.maintain(&self.device.raw)?;
1309 match *signal_fence {
1310 Fence::TimelineSemaphore(raw) => {
1311 signal_semaphores.push_signal(SemaphoreType::Timeline(raw, signal_value));
1312 }
1313 Fence::FencePool {
1314 ref mut active,
1315 ref mut free,
1316 ..
1317 } => {
1318 fence_raw = match free.pop() {
1319 Some(raw) => raw,
1320 None => unsafe {
1321 self.device
1322 .raw
1323 .create_fence(&vk::FenceCreateInfo::default(), None)
1324 .map_err(map_host_device_oom_err)?
1325 },
1326 };
1327 active.push((signal_value, fence_raw));
1328 }
1329 }
1330
1331 let vk_cmd_buffers = command_buffers
1332 .iter()
1333 .map(|cmd| cmd.raw)
1334 .collect::<Vec<_>>();
1335
1336 let mut vk_info = vk::SubmitInfo::default().command_buffers(&vk_cmd_buffers);
1337 let mut vk_timeline_info = mem::MaybeUninit::uninit();
1338 vk_info = SemaphoreList::add_to_submit(
1339 &mut wait_semaphores,
1340 &mut signal_semaphores,
1341 vk_info,
1342 &mut vk_timeline_info,
1343 );
1344
1345 profiling::scope!("vkQueueSubmit");
1346 unsafe {
1347 self.device
1348 .raw
1349 .queue_submit(self.raw, &[vk_info], fence_raw)
1350 .map_err(map_host_device_oom_and_lost_err)?
1351 };
1352 Ok(())
1353 }
1354
1355 unsafe fn present(
1356 &self,
1357 surface: &Surface,
1358 texture: SurfaceTexture,
1359 ) -> Result<(), crate::SurfaceError> {
1360 let mut swapchain = surface.swapchain.write();
1361
1362 unsafe { swapchain.as_mut().unwrap().present(self, texture) }
1363 }
1364
1365 unsafe fn get_timestamp_period(&self) -> f32 {
1366 self.device.timestamp_period
1367 }
1368
1369 unsafe fn wait_for_idle(&self) -> Result<(), crate::DeviceError> {
1370 unsafe { self.device.raw.queue_wait_idle(self.raw) }
1371 .map_err(map_host_device_oom_and_lost_err)
1372 }
1373}
1374
1375impl Queue {
1376 pub fn raw_device(&self) -> &ash::Device {
1377 &self.device.raw
1378 }
1379
1380 pub fn add_signal_semaphore(&self, semaphore: vk::Semaphore, semaphore_value: Option<u64>) {
1381 let mut guard = self.signal_semaphores.lock();
1382 if let Some(value) = semaphore_value {
1383 guard.push_signal(SemaphoreType::Timeline(semaphore, value));
1384 } else {
1385 guard.push_signal(SemaphoreType::Binary(semaphore));
1386 }
1387 }
1388
1389 /// Remove `semaphore` from the pending signal list if it is still present.
1390 ///
1391 /// Returns `true` if the semaphore was found and removed. If the submit
1392 /// already consumed it, this is a harmless no-op that returns `false`.
1393 pub fn remove_signal_semaphore(&self, semaphore: vk::Semaphore) -> bool {
1394 self.signal_semaphores.lock().remove(semaphore)
1395 }
1396}
1397
1398/// Maps
1399///
1400/// - VK_ERROR_OUT_OF_HOST_MEMORY
1401/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1402fn map_host_device_oom_err(err: vk::Result) -> crate::DeviceError {
1403 match err {
1404 vk::Result::ERROR_OUT_OF_HOST_MEMORY | vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => {
1405 get_oom_err(err)
1406 }
1407 e => get_unexpected_err(e),
1408 }
1409}
1410
1411/// Maps
1412///
1413/// - VK_ERROR_OUT_OF_HOST_MEMORY
1414/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1415/// - VK_ERROR_DEVICE_LOST
1416fn map_host_device_oom_and_lost_err(err: vk::Result) -> crate::DeviceError {
1417 match err {
1418 vk::Result::ERROR_DEVICE_LOST => get_lost_err(),
1419 other => map_host_device_oom_err(other),
1420 }
1421}
1422
1423/// Maps
1424///
1425/// - VK_ERROR_OUT_OF_HOST_MEMORY
1426/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1427/// - VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1428fn map_host_device_oom_and_ioca_err(err: vk::Result) -> crate::DeviceError {
1429 // We don't use VK_KHR_buffer_device_address
1430 // VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1431 map_host_device_oom_err(err)
1432}
1433
1434/// Maps
1435///
1436/// - VK_ERROR_OUT_OF_HOST_MEMORY
1437fn map_host_oom_err(err: vk::Result) -> crate::DeviceError {
1438 match err {
1439 vk::Result::ERROR_OUT_OF_HOST_MEMORY => get_oom_err(err),
1440 e => get_unexpected_err(e),
1441 }
1442}
1443
1444/// Maps
1445///
1446/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1447fn map_device_oom_err(err: vk::Result) -> crate::DeviceError {
1448 match err {
1449 vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => get_oom_err(err),
1450 e => get_unexpected_err(e),
1451 }
1452}
1453
1454/// Maps
1455///
1456/// - VK_ERROR_OUT_OF_HOST_MEMORY
1457/// - VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1458fn map_host_oom_and_ioca_err(err: vk::Result) -> crate::DeviceError {
1459 // We don't use VK_KHR_buffer_device_address
1460 // VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1461 map_host_oom_err(err)
1462}
1463
1464/// Maps
1465///
1466/// - VK_ERROR_OUT_OF_HOST_MEMORY
1467/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1468/// - VK_PIPELINE_COMPILE_REQUIRED_EXT
1469/// - VK_ERROR_INVALID_SHADER_NV
1470fn map_pipeline_err(err: vk::Result) -> crate::DeviceError {
1471 // We don't use VK_EXT_pipeline_creation_cache_control
1472 // VK_PIPELINE_COMPILE_REQUIRED_EXT
1473 // We don't use VK_NV_glsl_shader
1474 // VK_ERROR_INVALID_SHADER_NV
1475 map_host_device_oom_err(err)
1476}
1477
1478/// Returns [`crate::DeviceError::Unexpected`] or panics if the `internal_error_panic`
1479/// feature flag is enabled.
1480fn get_unexpected_err(_err: vk::Result) -> crate::DeviceError {
1481 #[cfg(feature = "internal_error_panic")]
1482 panic!("Unexpected Vulkan error: {_err:?}");
1483
1484 #[allow(unreachable_code)]
1485 crate::DeviceError::Unexpected
1486}
1487
1488/// Returns [`crate::DeviceError::OutOfMemory`].
1489fn get_oom_err(_err: vk::Result) -> crate::DeviceError {
1490 crate::DeviceError::OutOfMemory
1491}
1492
1493/// Returns [`crate::DeviceError::Lost`] or panics if the `device_lost_panic`
1494/// feature flag is enabled.
1495fn get_lost_err() -> crate::DeviceError {
1496 #[cfg(feature = "device_lost_panic")]
1497 panic!("Device lost");
1498
1499 #[allow(unreachable_code)]
1500 crate::DeviceError::Lost
1501}
1502
1503#[derive(Clone, Copy, Pod, Zeroable)]
1504#[repr(C)]
1505struct RawTlasInstance {
1506 transform: [f32; 12],
1507 custom_data_and_mask: u32,
1508 shader_binding_table_record_offset_and_flags: u32,
1509 acceleration_structure_reference: u64,
1510}
1511
1512/// Arguments to the [`CreateDeviceCallback`].
1513pub struct CreateDeviceCallbackArgs<'arg, 'pnext, 'this>
1514where
1515 'this: 'pnext,
1516{
1517 /// The extensions to enable for the device. You must not remove anything from this list,
1518 /// but you may add to it.
1519 pub extensions: &'arg mut Vec<&'static CStr>,
1520 /// The physical device features to enable. You may enable features, but must not disable any.
1521 pub device_features: &'arg mut PhysicalDeviceFeatures,
1522 /// The queue create infos for the device. You may add or modify queue create infos as needed.
1523 pub queue_create_infos: &'arg mut Vec<vk::DeviceQueueCreateInfo<'pnext>>,
1524 /// The create info for the device. You may add or modify things in the pnext chain, but
1525 /// do not turn features off. Additionally, do not add things to the list of extensions,
1526 /// or to the feature set, as all changes to that member will be overwritten.
1527 pub create_info: &'arg mut vk::DeviceCreateInfo<'pnext>,
1528 /// We need to have `'this` in the struct, so we can declare that all lifetimes coming from
1529 /// captures in the closure will live longer (and hence satisfy) `'pnext`. However, we
1530 /// don't actually directly use `'this`
1531 _phantom: PhantomData<&'this ()>,
1532}
1533
1534/// Callback to allow changing the vulkan device creation parameters.
1535///
1536/// # Safety:
1537/// - If you want to add extensions, add the to the `Vec<'static CStr>` not the create info,
1538/// as the create info value will be overwritten.
1539/// - Callback must not remove features.
1540/// - Callback must not change anything to what the instance does not support.
1541pub type CreateDeviceCallback<'this> =
1542 dyn for<'arg, 'pnext> FnOnce(CreateDeviceCallbackArgs<'arg, 'pnext, 'this>) + 'this;
1543
1544/// Arguments to the [`CreateInstanceCallback`].
1545pub struct CreateInstanceCallbackArgs<'arg, 'pnext, 'this>
1546where
1547 'this: 'pnext,
1548{
1549 /// The extensions to enable for the instance. You must not remove anything from this list,
1550 /// but you may add to it.
1551 pub extensions: &'arg mut Vec<&'static CStr>,
1552 /// The create info for the instance. You may add or modify things in the pnext chain, but
1553 /// do not turn features off. Additionally, do not add things to the list of extensions,
1554 /// all changes to that member will be overwritten.
1555 pub create_info: &'arg mut vk::InstanceCreateInfo<'pnext>,
1556 /// Vulkan entry point.
1557 pub entry: &'arg ash::Entry,
1558 /// We need to have `'this` in the struct, so we can declare that all lifetimes coming from
1559 /// captures in the closure will live longer (and hence satisfy) `'pnext`. However, we
1560 /// don't actually directly use `'this`
1561 _phantom: PhantomData<&'this ()>,
1562}
1563
1564/// Callback to allow changing the vulkan instance creation parameters.
1565///
1566/// # Safety:
1567/// - If you want to add extensions, add the to the `Vec<'static CStr>` not the create info,
1568/// as the create info value will be overwritten.
1569/// - Callback must not remove features.
1570/// - Callback must not change anything to what the instance does not support.
1571pub type CreateInstanceCallback<'this> =
1572 dyn for<'arg, 'pnext> FnOnce(CreateInstanceCallbackArgs<'arg, 'pnext, 'this>) + 'this;