wgpu_hal/vulkan/mod.rs
1/*!
2# Vulkan API internals.
3
4## Stack memory
5
6Ash expects slices, which we don't generally have available.
7We cope with this requirement by the combination of the following ways:
8 - temporarily allocating `Vec` on heap, where overhead is permitted
9 - growing temporary local storage
10
11## Framebuffers and Render passes
12
13Render passes are cached on the device and kept forever.
14
15Framebuffers are also cached on the device, but they are removed when
16any of the image views (they have) gets removed.
17If Vulkan supports image-less framebuffers,
18then the actual views are excluded from the framebuffer key.
19
20## Fences
21
22If timeline semaphores are available, they are used 1:1 with wgpu-hal fences.
23Otherwise, we manage a pool of `VkFence` objects behind each `hal::Fence`.
24
25!*/
26
27mod adapter;
28mod command;
29pub mod conv;
30mod descriptor;
31mod device;
32mod drm;
33mod instance;
34mod sampler;
35mod semaphore_list;
36mod swapchain;
37
38pub use adapter::PhysicalDeviceFeatures;
39
40use alloc::{boxed::Box, ffi::CString, sync::Arc, vec::Vec};
41use core::{borrow::Borrow, ffi::CStr, fmt, marker::PhantomData, mem, num::NonZeroU32};
42
43use arrayvec::ArrayVec;
44use ash::{ext, khr, vk};
45use bytemuck::{Pod, Zeroable};
46use hashbrown::HashSet;
47use parking_lot::{Mutex, RwLock};
48
49use naga::FastHashMap;
50use wgt::InternalCounter;
51
52use semaphore_list::SemaphoreList;
53
54use crate::vulkan::semaphore_list::{SemaphoreListMode, SemaphoreType};
55
56const MAX_TOTAL_ATTACHMENTS: usize = crate::MAX_COLOR_ATTACHMENTS * 2 + 1;
57
58#[derive(Clone, Debug)]
59pub struct Api;
60
61impl crate::Api for Api {
62 const VARIANT: wgt::Backend = wgt::Backend::Vulkan;
63
64 type Instance = Instance;
65 type Surface = Surface;
66 type Adapter = Adapter;
67 type Device = Device;
68
69 type Queue = Queue;
70 type CommandEncoder = CommandEncoder;
71 type CommandBuffer = CommandBuffer;
72
73 type Buffer = Buffer;
74 type Texture = Texture;
75 type SurfaceTexture = SurfaceTexture;
76 type TextureView = TextureView;
77 type Sampler = Sampler;
78 type QuerySet = QuerySet;
79 type Fence = Fence;
80 type AccelerationStructure = AccelerationStructure;
81 type PipelineCache = PipelineCache;
82
83 type BindGroupLayout = BindGroupLayout;
84 type BindGroup = BindGroup;
85 type PipelineLayout = PipelineLayout;
86 type ShaderModule = ShaderModule;
87 type RenderPipeline = RenderPipeline;
88 type ComputePipeline = ComputePipeline;
89}
90
91crate::impl_dyn_resource!(
92 Adapter,
93 AccelerationStructure,
94 BindGroup,
95 BindGroupLayout,
96 Buffer,
97 CommandBuffer,
98 CommandEncoder,
99 ComputePipeline,
100 Device,
101 Fence,
102 Instance,
103 PipelineCache,
104 PipelineLayout,
105 QuerySet,
106 Queue,
107 RenderPipeline,
108 Sampler,
109 ShaderModule,
110 Surface,
111 SurfaceTexture,
112 Texture,
113 TextureView
114);
115
116struct DebugUtils {
117 extension: ext::debug_utils::Instance,
118 messenger: vk::DebugUtilsMessengerEXT,
119
120 /// Owning pointer to the debug messenger callback user data.
121 ///
122 /// `InstanceShared::drop` destroys the debug messenger before
123 /// dropping this, so the callback should never receive a dangling
124 /// user data pointer.
125 #[allow(dead_code)]
126 callback_data: Box<DebugUtilsMessengerUserData>,
127}
128
129pub struct DebugUtilsCreateInfo {
130 severity: vk::DebugUtilsMessageSeverityFlagsEXT,
131 message_type: vk::DebugUtilsMessageTypeFlagsEXT,
132 callback_data: Box<DebugUtilsMessengerUserData>,
133}
134
135#[derive(Debug)]
136/// The properties related to the validation layer needed for the
137/// DebugUtilsMessenger for their workarounds
138struct ValidationLayerProperties {
139 /// Validation layer description, from `vk::LayerProperties`.
140 layer_description: CString,
141
142 /// Validation layer specification version, from `vk::LayerProperties`.
143 layer_spec_version: u32,
144}
145
146/// User data needed by `instance::debug_utils_messenger_callback`.
147///
148/// When we create the [`vk::DebugUtilsMessengerEXT`], the `pUserData`
149/// pointer refers to one of these values.
150#[derive(Debug)]
151pub struct DebugUtilsMessengerUserData {
152 /// The properties related to the validation layer, if present
153 validation_layer_properties: Option<ValidationLayerProperties>,
154
155 /// If the OBS layer is present. OBS never increments the version of their layer,
156 /// so there's no reason to have the version.
157 has_obs_layer: bool,
158}
159
160pub struct InstanceShared {
161 raw: ash::Instance,
162 extensions: Vec<&'static CStr>,
163 flags: wgt::InstanceFlags,
164 memory_budget_thresholds: wgt::MemoryBudgetThresholds,
165 debug_utils: Option<DebugUtils>,
166 get_physical_device_properties: Option<khr::get_physical_device_properties2::Instance>,
167 entry: ash::Entry,
168 has_nv_optimus: bool,
169 android_sdk_version: u32,
170 /// The instance API version.
171 ///
172 /// Which is the version of Vulkan supported for instance-level functionality.
173 ///
174 /// It is associated with a `VkInstance` and its children,
175 /// except for a `VkPhysicalDevice` and its children.
176 instance_api_version: u32,
177
178 // The `drop_guard` field must be the last field of this struct so it is dropped last.
179 // Do not add new fields after it.
180 drop_guard: Option<crate::DropGuard>,
181}
182
183pub struct Instance {
184 shared: Arc<InstanceShared>,
185}
186
187pub struct Surface {
188 swapchain: RwLock<Option<Box<dyn swapchain::Swapchain>>>,
189 inner: Box<dyn swapchain::Surface>,
190}
191
192impl Surface {
193 /// Returns the raw Vulkan surface handle.
194 ///
195 /// Returns `None` if the surface is a DXGI surface.
196 pub unsafe fn raw_native_handle(&self) -> Option<vk::SurfaceKHR> {
197 Some(
198 self.inner
199 .as_any()
200 .downcast_ref::<swapchain::NativeSurface>()?
201 .as_raw(),
202 )
203 }
204
205 /// Get the raw Vulkan swapchain associated with this surface.
206 ///
207 /// Returns [`None`] if the surface is not configured or if the swapchain
208 /// is a DXGI swapchain.
209 pub fn raw_native_swapchain(&self) -> Option<vk::SwapchainKHR> {
210 let read = self.swapchain.read();
211 Some(
212 read.as_ref()?
213 .as_any()
214 .downcast_ref::<swapchain::NativeSwapchain>()?
215 .as_raw(),
216 )
217 }
218
219 /// Set the present timing information which will be used for the next [presentation](crate::Queue::present()) of this surface,
220 /// using [VK_GOOGLE_display_timing].
221 ///
222 /// This can be used to give an id to presentations, for future use of [`vk::PastPresentationTimingGOOGLE`].
223 /// Note that `wgpu-hal` does *not* provide a way to use that API - you should manually access this through [`ash`].
224 ///
225 /// This can also be used to add a "not before" timestamp to the presentation.
226 ///
227 /// The exact semantics of the fields are also documented in the [specification](https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPresentTimeGOOGLE.html) for the extension.
228 ///
229 /// # Panics
230 ///
231 /// - If the surface hasn't been configured.
232 /// - If the surface has been configured for a DXGI swapchain.
233 /// - If the device doesn't [support present timing](wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING).
234 ///
235 /// [VK_GOOGLE_display_timing]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_GOOGLE_display_timing.html
236 #[track_caller]
237 pub fn set_next_present_time(&self, present_timing: vk::PresentTimeGOOGLE) {
238 let mut swapchain = self.swapchain.write();
239 swapchain
240 .as_mut()
241 .expect("Surface should have been configured")
242 .as_any_mut()
243 .downcast_mut::<swapchain::NativeSwapchain>()
244 .expect("Surface should have a native Vulkan swapchain")
245 .set_next_present_time(present_timing);
246 }
247}
248
249#[derive(Debug)]
250pub struct SurfaceTexture {
251 index: u32,
252 texture: Texture,
253 metadata: Box<dyn swapchain::SurfaceTextureMetadata>,
254}
255
256impl crate::DynSurfaceTexture for SurfaceTexture {}
257
258impl Borrow<Texture> for SurfaceTexture {
259 fn borrow(&self) -> &Texture {
260 &self.texture
261 }
262}
263
264impl Borrow<dyn crate::DynTexture> for SurfaceTexture {
265 fn borrow(&self) -> &dyn crate::DynTexture {
266 &self.texture
267 }
268}
269
270pub struct Adapter {
271 raw: vk::PhysicalDevice,
272 instance: Arc<InstanceShared>,
273 //queue_families: Vec<vk::QueueFamilyProperties>,
274 known_memory_flags: vk::MemoryPropertyFlags,
275 phd_capabilities: adapter::PhysicalDeviceProperties,
276 phd_features: PhysicalDeviceFeatures,
277 downlevel_flags: wgt::DownlevelFlags,
278 private_caps: PrivateCapabilities,
279 workarounds: Workarounds,
280}
281
282// TODO there's no reason why this can't be unified--the function pointers should all be the same--it's not clear how to do this with `ash`.
283enum ExtensionFn<T> {
284 /// The loaded function pointer struct for an extension.
285 Extension(T),
286 /// The extension was promoted to a core version of Vulkan and the functions on `ash`'s `DeviceV1_x` traits should be used.
287 Promoted,
288}
289
290struct DeviceExtensionFunctions {
291 debug_utils: Option<ext::debug_utils::Device>,
292 draw_indirect_count: Option<khr::draw_indirect_count::Device>,
293 timeline_semaphore: Option<ExtensionFn<khr::timeline_semaphore::Device>>,
294 ray_tracing: Option<RayTracingDeviceExtensionFunctions>,
295 mesh_shading: Option<ext::mesh_shader::Device>,
296 #[cfg_attr(not(unix), allow(dead_code))]
297 external_memory_fd: Option<khr::external_memory_fd::Device>,
298}
299
300struct RayTracingDeviceExtensionFunctions {
301 acceleration_structure: khr::acceleration_structure::Device,
302 buffer_device_address: khr::buffer_device_address::Device,
303}
304
305/// Set of internal capabilities, which don't show up in the exposed
306/// device geometry, but affect the code paths taken internally.
307#[derive(Clone, Debug)]
308struct PrivateCapabilities {
309 image_view_usage: bool,
310 timeline_semaphores: bool,
311 texture_d24: bool,
312 texture_d24_s8: bool,
313 texture_s8: bool,
314 /// Ability to present contents to any screen. Only needed to work around broken platform configurations.
315 can_present: bool,
316 non_coherent_map_mask: wgt::BufferAddress,
317 multi_draw_indirect: bool,
318 max_draw_indirect_count: u32,
319
320 /// True if this adapter advertises the [`robustBufferAccess`][vrba] feature.
321 ///
322 /// Note that Vulkan's `robustBufferAccess` is not sufficient to implement
323 /// `wgpu_hal`'s guarantee that shaders will not access buffer contents via
324 /// a given bindgroup binding outside that binding's [accessible
325 /// region][ar]. Enabling `robustBufferAccess` does ensure that
326 /// out-of-bounds reads and writes are not undefined behavior (that's good),
327 /// but still permits out-of-bounds reads to return data from anywhere
328 /// within the buffer, not just the accessible region.
329 ///
330 /// [ar]: ../struct.BufferBinding.html#accessible-region
331 /// [vrba]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#features-robustBufferAccess
332 robust_buffer_access: bool,
333
334 robust_image_access: bool,
335
336 /// True if this adapter supports the [`VK_EXT_robustness2`] extension's
337 /// [`robustBufferAccess2`] feature.
338 ///
339 /// This is sufficient to implement `wgpu_hal`'s [required bounds-checking][ar] of
340 /// shader accesses to buffer contents. If this feature is not available,
341 /// this backend must have Naga inject bounds checks in the generated
342 /// SPIR-V.
343 ///
344 /// [`VK_EXT_robustness2`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_EXT_robustness2.html
345 /// [`robustBufferAccess2`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPhysicalDeviceRobustness2FeaturesEXT.html#features-robustBufferAccess2
346 /// [ar]: ../struct.BufferBinding.html#accessible-region
347 robust_buffer_access2: bool,
348
349 robust_image_access2: bool,
350 zero_initialize_workgroup_memory: bool,
351 image_format_list: bool,
352 maximum_samplers: u32,
353
354 /// True if this adapter supports the [`VK_KHR_shader_integer_dot_product`] extension
355 /// (promoted to Vulkan 1.3).
356 ///
357 /// This is used to generate optimized code for WGSL's `dot4{I, U}8Packed`.
358 ///
359 /// [`VK_KHR_shader_integer_dot_product`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_KHR_shader_integer_dot_product.html
360 shader_integer_dot_product: bool,
361
362 /// True if this adapter supports 8-bit integers provided by the
363 /// [`VK_KHR_shader_float16_int8`] extension (promoted to Vulkan 1.2).
364 ///
365 /// Allows shaders to declare the "Int8" capability. Note, however, that this
366 /// feature alone allows the use of 8-bit integers "only in the `Private`,
367 /// `Workgroup` (for non-Block variables), and `Function` storage classes"
368 /// ([see spec]). To use 8-bit integers in the interface storage classes (e.g.,
369 /// `StorageBuffer`), you also need to enable the corresponding feature in
370 /// `VkPhysicalDevice8BitStorageFeatures` and declare the corresponding SPIR-V
371 /// capability (e.g., `StorageBuffer8BitAccess`).
372 ///
373 /// [`VK_KHR_shader_float16_int8`]: https://registry.khronos.org/vulkan/specs/latest/man/html/VK_KHR_shader_float16_int8.html
374 /// [see spec]: https://registry.khronos.org/vulkan/specs/latest/man/html/VkPhysicalDeviceShaderFloat16Int8Features.html#extension-features-shaderInt8
375 shader_int8: bool,
376
377 /// This is done to panic before undefined behavior, and is imperfect.
378 /// Basically, to allow implementations to emulate mv using instancing, if you
379 /// want to draw `n` instances to VR, you must draw `2n` instances, but you
380 /// can never draw more than `u32::MAX` instances. Therefore, when drawing
381 /// multiview on some vulkan implementations, it might restrict the instance
382 /// count, which isn't usually a thing in webgpu. We don't expose this limit
383 /// because its strange, i.e. only occurs on certain vulkan implementations
384 /// if you are drawing more than 128 million instances. We still want to avoid
385 /// undefined behavior in this situation, so we panic if the limit is violated.
386 multiview_instance_index_limit: u32,
387
388 /// BufferUsages::ACCELERATION_STRUCTURE_SCRATCH allows usage as a scratch buffer.
389 /// Vulkan has no way to specify this as a usage, and it maps to other usages, but
390 /// these usages do not have as high of an alignment requirement using the buffer as
391 /// a scratch buffer when building acceleration structures.
392 scratch_buffer_alignment: u32,
393}
394
395bitflags::bitflags!(
396 /// Workaround flags.
397 #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
398 pub struct Workarounds: u32 {
399 /// Only generate SPIR-V for one entry point at a time.
400 const SEPARATE_ENTRY_POINTS = 0x1;
401 /// Qualcomm OOMs when there are zero color attachments but a non-null pointer
402 /// to a subpass resolve attachment array. This nulls out that pointer in that case.
403 const EMPTY_RESOLVE_ATTACHMENT_LISTS = 0x2;
404 /// If the following code returns false, then nvidia will end up filling the wrong range.
405 ///
406 /// ```skip
407 /// fn nvidia_succeeds() -> bool {
408 /// # let (copy_length, start_offset) = (0, 0);
409 /// if copy_length >= 4096 {
410 /// if start_offset % 16 != 0 {
411 /// if copy_length == 4096 {
412 /// return true;
413 /// }
414 /// if copy_length % 16 == 0 {
415 /// return false;
416 /// }
417 /// }
418 /// }
419 /// true
420 /// }
421 /// ```
422 ///
423 /// As such, we need to make sure all calls to vkCmdFillBuffer are aligned to 16 bytes
424 /// if they cover a range of 4096 bytes or more.
425 const FORCE_FILL_BUFFER_WITH_SIZE_GREATER_4096_ALIGNED_OFFSET_16 = 0x4;
426 }
427);
428
429#[derive(Clone, Debug, Eq, Hash, PartialEq)]
430struct AttachmentKey {
431 format: vk::Format,
432 layout: vk::ImageLayout,
433 ops: crate::AttachmentOps,
434}
435
436impl AttachmentKey {
437 /// Returns an attachment key for a compatible attachment.
438 fn compatible(format: vk::Format, layout: vk::ImageLayout) -> Self {
439 Self {
440 format,
441 layout,
442 ops: crate::AttachmentOps::all(),
443 }
444 }
445}
446
447#[derive(Clone, Eq, Hash, PartialEq)]
448struct ColorAttachmentKey {
449 base: AttachmentKey,
450 resolve: Option<AttachmentKey>,
451}
452
453#[derive(Clone, Eq, Hash, PartialEq)]
454struct DepthStencilAttachmentKey {
455 base: AttachmentKey,
456 stencil_ops: crate::AttachmentOps,
457}
458
459#[derive(Clone, Eq, Default, Hash, PartialEq)]
460struct RenderPassKey {
461 colors: ArrayVec<Option<ColorAttachmentKey>, { crate::MAX_COLOR_ATTACHMENTS }>,
462 depth_stencil: Option<DepthStencilAttachmentKey>,
463 sample_count: u32,
464 multiview_mask: Option<NonZeroU32>,
465}
466
467struct DeviceShared {
468 raw: ash::Device,
469 family_index: u32,
470 queue_index: u32,
471 raw_queue: vk::Queue,
472 instance: Arc<InstanceShared>,
473 physical_device: vk::PhysicalDevice,
474 enabled_extensions: Vec<&'static CStr>,
475 extension_fns: DeviceExtensionFunctions,
476 vendor_id: u32,
477 pipeline_cache_validation_key: [u8; 16],
478 timestamp_period: f32,
479 private_caps: PrivateCapabilities,
480 workarounds: Workarounds,
481 features: wgt::Features,
482 render_passes: Mutex<FastHashMap<RenderPassKey, vk::RenderPass>>,
483 sampler_cache: Mutex<sampler::SamplerCache>,
484 memory_allocations_counter: InternalCounter,
485
486 /// Because we have cached framebuffers which are not deleted from until
487 /// the device is destroyed, if the implementation of vulkan re-uses handles
488 /// we need some way to differentiate between the old handle and the new handle.
489 /// This factory allows us to have a dedicated identity value for each texture.
490 texture_identity_factory: ResourceIdentityFactory<vk::Image>,
491 /// As above, for texture views.
492 texture_view_identity_factory: ResourceIdentityFactory<vk::ImageView>,
493
494 empty_descriptor_set_layout: vk::DescriptorSetLayout,
495
496 // The `drop_guard` field must be the last field of this struct so it is dropped last.
497 // Do not add new fields after it.
498 drop_guard: Option<crate::DropGuard>,
499}
500
501impl Drop for DeviceShared {
502 fn drop(&mut self) {
503 for &raw in self.render_passes.lock().values() {
504 unsafe { self.raw.destroy_render_pass(raw, None) };
505 }
506 unsafe {
507 self.raw
508 .destroy_descriptor_set_layout(self.empty_descriptor_set_layout, None)
509 };
510 if self.drop_guard.is_none() {
511 unsafe { self.raw.destroy_device(None) };
512 }
513 }
514}
515
516pub struct Device {
517 mem_allocator: Mutex<gpu_allocator::vulkan::Allocator>,
518 desc_allocator: Mutex<descriptor::DescriptorAllocator>,
519 valid_ash_memory_types: u32,
520 naga_options: naga::back::spv::Options<'static>,
521 #[cfg(feature = "renderdoc")]
522 render_doc: crate::auxil::renderdoc::RenderDoc,
523 counters: Arc<wgt::HalCounters>,
524 // Struct members are dropped from first to last, put the Device last to ensure that
525 // all resources that depends on it are destroyed before it like the mem_allocator
526 shared: Arc<DeviceShared>,
527}
528
529impl Drop for Device {
530 fn drop(&mut self) {}
531}
532
533/// Semaphores for forcing queue submissions to run in order.
534///
535/// The [`wgpu_hal::Queue`] trait promises that if two calls to [`submit`] are
536/// ordered, then the first submission will finish on the GPU before the second
537/// submission begins. To get this behavior on Vulkan we need to pass semaphores
538/// to [`vkQueueSubmit`] for the commands to wait on before beginning execution,
539/// and to signal when their execution is done.
540///
541/// Normally this can be done with a single semaphore, waited on and then
542/// signalled for each submission. At any given time there's exactly one
543/// submission that would signal the semaphore, and exactly one waiting on it,
544/// as Vulkan requires.
545///
546/// However, as of Oct 2021, bug [#5508] in the Mesa ANV drivers caused them to
547/// hang if we use a single semaphore. The workaround is to alternate between
548/// two semaphores. The bug has been fixed in Mesa, but we should probably keep
549/// the workaround until, say, Oct 2026.
550///
551/// [`wgpu_hal::Queue`]: crate::Queue
552/// [`submit`]: crate::Queue::submit
553/// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
554/// [#5508]: https://gitlab.freedesktop.org/mesa/mesa/-/issues/5508
555#[derive(Clone)]
556struct RelaySemaphores {
557 /// The semaphore the next submission should wait on before beginning
558 /// execution on the GPU. This is `None` for the first submission, which
559 /// should not wait on anything at all.
560 wait: Option<vk::Semaphore>,
561
562 /// The semaphore the next submission should signal when it has finished
563 /// execution on the GPU.
564 signal: vk::Semaphore,
565}
566
567impl RelaySemaphores {
568 fn new(device: &DeviceShared) -> Result<Self, crate::DeviceError> {
569 Ok(Self {
570 wait: None,
571 signal: device.new_binary_semaphore("RelaySemaphores: 1")?,
572 })
573 }
574
575 /// Advances the semaphores, returning the semaphores that should be used for a submission.
576 fn advance(&mut self, device: &DeviceShared) -> Result<Self, crate::DeviceError> {
577 let old = self.clone();
578
579 // Build the state for the next submission.
580 match self.wait {
581 None => {
582 // The `old` values describe the first submission to this queue.
583 // The second submission should wait on `old.signal`, and then
584 // signal a new semaphore which we'll create now.
585 self.wait = Some(old.signal);
586 self.signal = device.new_binary_semaphore("RelaySemaphores: 2")?;
587 }
588 Some(ref mut wait) => {
589 // What this submission signals, the next should wait.
590 mem::swap(wait, &mut self.signal);
591 }
592 };
593
594 Ok(old)
595 }
596
597 /// Destroys the semaphores.
598 unsafe fn destroy(&self, device: &ash::Device) {
599 unsafe {
600 if let Some(wait) = self.wait {
601 device.destroy_semaphore(wait, None);
602 }
603 device.destroy_semaphore(self.signal, None);
604 }
605 }
606}
607
608pub struct Queue {
609 raw: vk::Queue,
610 device: Arc<DeviceShared>,
611 family_index: u32,
612 relay_semaphores: Mutex<RelaySemaphores>,
613 signal_semaphores: Mutex<SemaphoreList>,
614 wait_semaphores: Mutex<SemaphoreList>,
615}
616
617impl Queue {
618 pub fn as_raw(&self) -> vk::Queue {
619 self.raw
620 }
621}
622
623impl Drop for Queue {
624 fn drop(&mut self) {
625 unsafe { self.relay_semaphores.lock().destroy(&self.device.raw) };
626 }
627}
628#[derive(Debug)]
629enum BufferMemoryBacking {
630 Managed(gpu_allocator::vulkan::Allocation),
631 VulkanMemory {
632 memory: vk::DeviceMemory,
633 offset: u64,
634 size: u64,
635 },
636}
637impl BufferMemoryBacking {
638 fn memory(&self) -> vk::DeviceMemory {
639 match self {
640 Self::Managed(m) => unsafe { m.memory() },
641 Self::VulkanMemory { memory, .. } => *memory,
642 }
643 }
644 fn offset(&self) -> u64 {
645 match self {
646 Self::Managed(m) => m.offset(),
647 Self::VulkanMemory { offset, .. } => *offset,
648 }
649 }
650 fn size(&self) -> u64 {
651 match self {
652 Self::Managed(m) => m.size(),
653 Self::VulkanMemory { size, .. } => *size,
654 }
655 }
656}
657/// Describes who owns a [`Buffer`]'s `vk::Buffer` handle and its backing memory,
658/// and therefore what cleanup is required when the buffer is destroyed.
659#[derive(Debug)]
660enum BufferOwnership {
661 /// wgpu-hal owns the `vk::Buffer` and its backing memory. On cleanup the buffer
662 /// handle is destroyed and the memory is released.
663 Managed(Mutex<BufferMemoryBacking>),
664 /// wgpu-hal owns the `vk::Buffer` handle but the backing memory is kept alive
665 /// by the caller. On cleanup only the buffer handle is destroyed.
666 RawHandle,
667 /// Caller owns the `vk::Buffer` and its backing memory. On cleanup the
668 /// [`crate::DropGuard`] runs the caller's cleanup callback and wgpu-hal touches
669 /// neither the handle nor the memory.
670 External(crate::DropGuard),
671}
672
673#[derive(Debug)]
674pub struct Buffer {
675 raw: vk::Buffer,
676
677 // This field must be last, because it may contain a `DropGuard` which needs to be dropped after all other fields.
678 ownership: BufferOwnership,
679}
680impl Buffer {
681 /// # Safety
682 ///
683 /// - `vk_buffer`'s memory must be managed by the caller
684 /// - Externally imported buffers can't be mapped by `wgpu`
685 pub unsafe fn from_raw(vk_buffer: vk::Buffer) -> Self {
686 Self {
687 raw: vk_buffer,
688 ownership: BufferOwnership::RawHandle,
689 }
690 }
691
692 /// # Safety
693 /// - `vk_buffer` must outlive the returned `Buffer`.
694 /// - wgpu-hal will NOT call `vkDestroyBuffer`; the caller remains responsible for the buffer handle's destruction.
695 /// The `drop_callback` runs when the `Buffer` drops and may be used to release caller-side bookkeeping.
696 /// - Externally imported buffers can't be mapped by `wgpu`.
697 pub unsafe fn from_raw_externally_owned(
698 vk_buffer: vk::Buffer,
699 drop_callback: crate::DropCallback,
700 ) -> Self {
701 Self {
702 raw: vk_buffer,
703 ownership: BufferOwnership::External(crate::DropGuard::new(drop_callback)),
704 }
705 }
706
707 /// # Safety
708 /// - We will use this buffer and the buffer's backing memory range as if we have exclusive ownership over it, until the wgpu resource is dropped and the wgpu-hal object is cleaned up
709 /// - Externally imported buffers can't be mapped by `wgpu`
710 /// - `offset` and `size` must be valid with the allocation of `memory`
711 pub unsafe fn from_raw_managed(
712 vk_buffer: vk::Buffer,
713 memory: vk::DeviceMemory,
714 offset: u64,
715 size: u64,
716 ) -> Self {
717 Self {
718 raw: vk_buffer,
719 ownership: BufferOwnership::Managed(Mutex::new(BufferMemoryBacking::VulkanMemory {
720 memory,
721 offset,
722 size,
723 })),
724 }
725 }
726
727 /// # Safety
728 /// - The buffer handle must not be manually destroyed
729 pub unsafe fn raw_handle(&self) -> vk::Buffer {
730 self.raw
731 }
732}
733
734impl crate::DynBuffer for Buffer {}
735
736#[derive(Debug)]
737pub struct AccelerationStructure {
738 raw: vk::AccelerationStructureKHR,
739 buffer: vk::Buffer,
740 allocation: gpu_allocator::vulkan::Allocation,
741 compacted_size_query: Option<vk::QueryPool>,
742}
743
744impl crate::DynAccelerationStructure for AccelerationStructure {}
745
746#[derive(Debug)]
747pub enum TextureMemory {
748 // shared memory in GPU allocator (owned by wgpu-hal)
749 Allocation(gpu_allocator::vulkan::Allocation),
750
751 // dedicated memory (owned by wgpu-hal)
752 Dedicated(vk::DeviceMemory),
753
754 // memory not owned by wgpu
755 External,
756}
757
758#[derive(Debug)]
759pub struct Texture {
760 raw: vk::Image,
761 memory: TextureMemory,
762 format: wgt::TextureFormat,
763 copy_size: crate::CopyExtent,
764 identity: ResourceIdentity<vk::Image>,
765
766 // The `drop_guard` field must be the last field of this struct so it is dropped last.
767 // Do not add new fields after it.
768 drop_guard: Option<crate::DropGuard>,
769}
770
771impl crate::DynTexture for Texture {}
772
773impl Texture {
774 /// # Safety
775 ///
776 /// - The image handle must not be manually destroyed
777 pub unsafe fn raw_handle(&self) -> vk::Image {
778 self.raw
779 }
780
781 /// # Safety
782 ///
783 /// - The caller must not free the `vk::DeviceMemory` or
784 /// `gpu_alloc::MemoryBlock` in the returned `TextureMemory`.
785 pub unsafe fn memory(&self) -> &TextureMemory {
786 &self.memory
787 }
788}
789
790#[derive(Debug)]
791pub struct TextureView {
792 raw_texture: vk::Image,
793 raw: vk::ImageView,
794 _layers: NonZeroU32,
795 format: wgt::TextureFormat,
796 raw_format: vk::Format,
797 base_mip_level: u32,
798 dimension: wgt::TextureViewDimension,
799 texture_identity: ResourceIdentity<vk::Image>,
800 view_identity: ResourceIdentity<vk::ImageView>,
801}
802
803impl crate::DynTextureView for TextureView {}
804
805impl TextureView {
806 /// # Safety
807 ///
808 /// - The image view handle must not be manually destroyed
809 pub unsafe fn raw_handle(&self) -> vk::ImageView {
810 self.raw
811 }
812
813 /// Returns the raw texture view, along with its identity.
814 fn identified_raw_view(&self) -> IdentifiedTextureView {
815 IdentifiedTextureView {
816 raw: self.raw,
817 identity: self.view_identity,
818 }
819 }
820}
821
822#[derive(Debug)]
823pub struct Sampler {
824 raw: vk::Sampler,
825 create_info: vk::SamplerCreateInfo<'static>,
826}
827
828impl crate::DynSampler for Sampler {}
829
830/// Information about a binding within a specific BindGroupLayout / BindGroup.
831/// This will be used to construct a [`naga::back::spv::BindingInfo`], where
832/// the descriptor set value will be taken from the index of the group.
833#[derive(Copy, Clone, Debug)]
834struct BindingInfo {
835 binding: u32,
836 binding_array_size: Option<NonZeroU32>,
837}
838
839#[derive(Debug)]
840pub struct BindGroupLayout {
841 raw: vk::DescriptorSetLayout,
842 desc_count: descriptor::DescriptorCounts,
843 /// Sorted list of entries.
844 entries: Box<[wgt::BindGroupLayoutEntry]>,
845 /// Map of original binding index to remapped binding index and optional
846 /// array size.
847 binding_map: Vec<(u32, BindingInfo)>,
848 contains_binding_arrays: bool,
849}
850
851impl crate::DynBindGroupLayout for BindGroupLayout {}
852
853#[derive(Debug)]
854pub struct PipelineLayout {
855 raw: vk::PipelineLayout,
856 binding_map: naga::back::spv::BindingMap,
857}
858
859impl crate::DynPipelineLayout for PipelineLayout {}
860
861#[derive(Debug)]
862pub struct BindGroup {
863 set: descriptor::DescriptorSet,
864}
865
866impl crate::DynBindGroup for BindGroup {}
867
868/// Miscellaneous allocation recycling pool for `CommandAllocator`.
869#[derive(Default)]
870struct Temp {
871 marker: Vec<u8>,
872 buffer_barriers: Vec<vk::BufferMemoryBarrier<'static>>,
873 image_barriers: Vec<vk::ImageMemoryBarrier<'static>>,
874}
875
876impl Temp {
877 fn clear(&mut self) {
878 self.marker.clear();
879 self.buffer_barriers.clear();
880 self.image_barriers.clear();
881 }
882
883 fn make_c_str(&mut self, name: &str) -> &CStr {
884 self.marker.clear();
885 self.marker.extend_from_slice(name.as_bytes());
886 self.marker.push(0);
887 unsafe { CStr::from_bytes_with_nul_unchecked(&self.marker) }
888 }
889}
890
891/// Generates unique IDs for each resource of type `T`.
892///
893/// Because vk handles are not permanently unique, this
894/// provides a way to generate unique IDs for each resource.
895struct ResourceIdentityFactory<T> {
896 #[cfg(not(target_has_atomic = "64"))]
897 next_id: Mutex<u64>,
898 #[cfg(target_has_atomic = "64")]
899 next_id: core::sync::atomic::AtomicU64,
900 _phantom: PhantomData<T>,
901}
902
903impl<T> ResourceIdentityFactory<T> {
904 fn new() -> Self {
905 Self {
906 #[cfg(not(target_has_atomic = "64"))]
907 next_id: Mutex::new(0),
908 #[cfg(target_has_atomic = "64")]
909 next_id: core::sync::atomic::AtomicU64::new(0),
910 _phantom: PhantomData,
911 }
912 }
913
914 /// Returns a new unique ID for a resource of type `T`.
915 fn next(&self) -> ResourceIdentity<T> {
916 #[cfg(not(target_has_atomic = "64"))]
917 {
918 let mut next_id = self.next_id.lock();
919 let id = *next_id;
920 *next_id += 1;
921 ResourceIdentity {
922 id,
923 _phantom: PhantomData,
924 }
925 }
926
927 #[cfg(target_has_atomic = "64")]
928 ResourceIdentity {
929 id: self
930 .next_id
931 .fetch_add(1, core::sync::atomic::Ordering::Relaxed),
932 _phantom: PhantomData,
933 }
934 }
935}
936
937/// A unique identifier for a resource of type `T`.
938///
939/// This is used as a hashable key for resources, which
940/// is permanently unique through the lifetime of the program.
941#[derive(Debug, Copy, Clone, Eq, Hash, PartialEq)]
942struct ResourceIdentity<T> {
943 id: u64,
944 _phantom: PhantomData<T>,
945}
946
947#[derive(Clone, Eq, Hash, PartialEq)]
948struct FramebufferKey {
949 raw_pass: vk::RenderPass,
950 /// Because this is used as a key in a hash map, we need to include the identity
951 /// so that this hashes differently, even if the ImageView handles are the same
952 /// between different views.
953 attachment_identities: ArrayVec<ResourceIdentity<vk::ImageView>, { MAX_TOTAL_ATTACHMENTS }>,
954 /// While this is redundant for calculating the hash, we need access to an array
955 /// of all the raw ImageViews when we are creating the actual framebuffer,
956 /// so we store this here.
957 attachment_views: ArrayVec<vk::ImageView, { MAX_TOTAL_ATTACHMENTS }>,
958 extent: wgt::Extent3d,
959}
960
961impl FramebufferKey {
962 fn push_view(&mut self, view: IdentifiedTextureView) {
963 self.attachment_identities.push(view.identity);
964 self.attachment_views.push(view.raw);
965 }
966}
967
968/// A texture view paired with its identity.
969#[derive(Copy, Clone)]
970struct IdentifiedTextureView {
971 raw: vk::ImageView,
972 identity: ResourceIdentity<vk::ImageView>,
973}
974
975#[derive(Clone, Eq, Hash, PartialEq)]
976struct TempTextureViewKey {
977 texture: vk::Image,
978 /// As this is used in a hashmap, we need to
979 /// include the identity so that this hashes differently,
980 /// even if the Image handles are the same between different images.
981 texture_identity: ResourceIdentity<vk::Image>,
982 format: vk::Format,
983 mip_level: u32,
984 depth_slice: u32,
985}
986
987pub struct CommandEncoder {
988 raw: vk::CommandPool,
989 device: Arc<DeviceShared>,
990
991 /// The current command buffer, if `self` is in the ["recording"]
992 /// state.
993 ///
994 /// ["recording"]: crate::CommandEncoder
995 ///
996 /// If non-`null`, the buffer is in the Vulkan "recording" state.
997 active: vk::CommandBuffer,
998
999 /// What kind of pass we are currently within: compute or render.
1000 bind_point: vk::PipelineBindPoint,
1001
1002 /// Allocation recycling pool for this encoder.
1003 temp: Temp,
1004
1005 /// A pool of available command buffers.
1006 ///
1007 /// These are all in the Vulkan "initial" state.
1008 free: Vec<vk::CommandBuffer>,
1009
1010 /// A pool of discarded command buffers.
1011 ///
1012 /// These could be in any Vulkan state except "pending".
1013 discarded: Vec<vk::CommandBuffer>,
1014
1015 /// If this is true, the active renderpass enabled a debug span,
1016 /// and needs to be disabled on renderpass close.
1017 rpass_debug_marker_active: bool,
1018
1019 /// If set, the end of the next render/compute pass will write a timestamp at
1020 /// the given pool & location.
1021 end_of_pass_timer_query: Option<(vk::QueryPool, u32)>,
1022
1023 framebuffers: FastHashMap<FramebufferKey, vk::Framebuffer>,
1024 temp_texture_views: FastHashMap<TempTextureViewKey, IdentifiedTextureView>,
1025
1026 counters: Arc<wgt::HalCounters>,
1027
1028 current_pipeline_is_multiview: bool,
1029}
1030
1031impl Drop for CommandEncoder {
1032 fn drop(&mut self) {
1033 // SAFETY:
1034 //
1035 // VUID-vkDestroyCommandPool-commandPool-00041: wgpu_hal requires that a
1036 // `CommandBuffer` must live until its execution is complete, and that a
1037 // `CommandBuffer` must not outlive the `CommandEncoder` that built it.
1038 // Thus, we know that none of our `CommandBuffers` are in the "pending"
1039 // state.
1040 //
1041 // The other VUIDs are pretty obvious.
1042 unsafe {
1043 // `vkDestroyCommandPool` also frees any command buffers allocated
1044 // from that pool, so there's no need to explicitly call
1045 // `vkFreeCommandBuffers` on `cmd_encoder`'s `free` and `discarded`
1046 // fields.
1047 self.device.raw.destroy_command_pool(self.raw, None);
1048 }
1049
1050 for (_, fb) in self.framebuffers.drain() {
1051 unsafe { self.device.raw.destroy_framebuffer(fb, None) };
1052 }
1053
1054 for (_, view) in self.temp_texture_views.drain() {
1055 unsafe { self.device.raw.destroy_image_view(view.raw, None) };
1056 }
1057
1058 self.counters.command_encoders.sub(1);
1059 }
1060}
1061
1062impl CommandEncoder {
1063 /// # Safety
1064 ///
1065 /// - The command buffer handle must not be manually destroyed
1066 pub unsafe fn raw_handle(&self) -> vk::CommandBuffer {
1067 self.active
1068 }
1069}
1070
1071impl fmt::Debug for CommandEncoder {
1072 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1073 f.debug_struct("CommandEncoder")
1074 .field("raw", &self.raw)
1075 .finish()
1076 }
1077}
1078
1079#[derive(Debug)]
1080pub struct CommandBuffer {
1081 raw: vk::CommandBuffer,
1082}
1083
1084impl crate::DynCommandBuffer for CommandBuffer {}
1085
1086#[derive(Debug)]
1087pub enum ShaderModule {
1088 Raw(vk::ShaderModule),
1089 Intermediate {
1090 naga_shader: crate::NagaShader,
1091 runtime_checks: wgt::ShaderRuntimeChecks,
1092 },
1093}
1094
1095impl crate::DynShaderModule for ShaderModule {}
1096
1097#[derive(Debug)]
1098pub struct RenderPipeline {
1099 raw: vk::Pipeline,
1100 is_multiview: bool,
1101}
1102
1103impl crate::DynRenderPipeline for RenderPipeline {}
1104
1105#[derive(Debug)]
1106pub struct ComputePipeline {
1107 raw: vk::Pipeline,
1108}
1109
1110impl crate::DynComputePipeline for ComputePipeline {}
1111
1112#[derive(Debug)]
1113pub struct PipelineCache {
1114 raw: vk::PipelineCache,
1115}
1116
1117impl crate::DynPipelineCache for PipelineCache {}
1118
1119#[derive(Debug)]
1120pub struct QuerySet {
1121 raw: vk::QueryPool,
1122}
1123
1124impl crate::DynQuerySet for QuerySet {}
1125
1126/// The [`Api::Fence`] type for [`vulkan::Api`].
1127///
1128/// This is an `enum` because there are two possible implementations of
1129/// `wgpu-hal` fences on Vulkan: Vulkan fences, which work on any version of
1130/// Vulkan, and Vulkan timeline semaphores, which are easier and cheaper but
1131/// require non-1.0 features.
1132///
1133/// [`Device::create_fence`] returns a [`TimelineSemaphore`] if
1134/// [`VK_KHR_timeline_semaphore`] is available and enabled, and a [`FencePool`]
1135/// otherwise.
1136///
1137/// [`Api::Fence`]: crate::Api::Fence
1138/// [`vulkan::Api`]: Api
1139/// [`Device::create_fence`]: crate::Device::create_fence
1140/// [`TimelineSemaphore`]: Fence::TimelineSemaphore
1141/// [`VK_KHR_timeline_semaphore`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VK_KHR_timeline_semaphore
1142/// [`FencePool`]: Fence::FencePool
1143#[derive(Debug)]
1144pub enum Fence {
1145 /// A Vulkan [timeline semaphore].
1146 ///
1147 /// These are simpler to use than Vulkan fences, since timeline semaphores
1148 /// work exactly the way [`wpgu_hal::Api::Fence`] is specified to work.
1149 ///
1150 /// [timeline semaphore]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-semaphores
1151 /// [`wpgu_hal::Api::Fence`]: crate::Api::Fence
1152 TimelineSemaphore(vk::Semaphore),
1153
1154 /// A collection of Vulkan [fence]s, each associated with a [`FenceValue`].
1155 ///
1156 /// The effective [`FenceValue`] of this variant is the greater of
1157 /// `last_completed` and the maximum value associated with a signalled fence
1158 /// in `active`.
1159 ///
1160 /// Fences are available in all versions of Vulkan, but since they only have
1161 /// two states, "signaled" and "unsignaled", we need to use a separate fence
1162 /// for each queue submission we might want to wait for, and remember which
1163 /// [`FenceValue`] each one represents.
1164 ///
1165 /// One should keep the fence pool read while there are any references to the
1166 /// fences inside of them. This ensures there are no race conditions when
1167 /// resetting the fences
1168 ///
1169 /// [fence]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-fences
1170 /// [`FenceValue`]: crate::FenceValue
1171 FencePool(RwLock<FencePool>),
1172}
1173
1174/// A shared fence type. The arc is expect to have a ref-count of one once a function has finished being called
1175///
1176/// A fence should have access synchronised as fence resetting might happen at any point. Resetting checks the ref-count
1177/// of the fence, so instead of copying the fence, it should have its `Arc` container cloned which shows not to reset
1178/// this fence as it is being used.
1179pub(super) type SynchronizedFence = Arc<vk::Fence>;
1180
1181#[derive(Debug)]
1182pub struct FencePool {
1183 last_completed: crate::FenceValue,
1184 /// The pending fence values have to be ascending.
1185 active: Vec<(crate::FenceValue, SynchronizedFence)>,
1186 // Don't need extra synchronisation around the fences here, if they are used they should be put into active.
1187 free: Vec<vk::Fence>,
1188}
1189
1190impl crate::DynFence for Fence {}
1191
1192impl Fence {
1193 /// Return the highest [`FenceValue`] among the signalled fences in `active`.
1194 ///
1195 /// As an optimization, assume that we already know that the fence has
1196 /// reached `last_completed`, and don't bother checking fences whose values
1197 /// are less than that: those fences remain in the `active` array only
1198 /// because we haven't called `maintain` yet to clean them up.
1199 ///
1200 /// [`FenceValue`]: crate::FenceValue
1201 fn check_active(
1202 device: &ash::Device,
1203 mut last_completed: crate::FenceValue,
1204 active: &[(crate::FenceValue, SynchronizedFence)],
1205 ) -> Result<crate::FenceValue, crate::DeviceError> {
1206 for &(value, ref raw) in active.iter() {
1207 unsafe {
1208 if value > last_completed
1209 && device
1210 // Don't need to clone as active should be from a read or
1211 // write lock which means this is already synchronised.
1212 .get_fence_status(**raw)
1213 .map_err(map_host_device_oom_and_lost_err)?
1214 {
1215 last_completed = value;
1216 }
1217 }
1218 }
1219 Ok(last_completed)
1220 }
1221
1222 /// Return the highest signalled [`FenceValue`] for `self`.
1223 ///
1224 /// [`FenceValue`]: crate::FenceValue
1225 fn get_latest(
1226 &self,
1227 device: &ash::Device,
1228 extension: Option<&ExtensionFn<khr::timeline_semaphore::Device>>,
1229 ) -> Result<crate::FenceValue, crate::DeviceError> {
1230 match *self {
1231 Self::TimelineSemaphore(raw) => unsafe {
1232 Ok(match *extension.unwrap() {
1233 ExtensionFn::Extension(ref ext) => ext
1234 .get_semaphore_counter_value(raw)
1235 .map_err(map_host_device_oom_and_lost_err)?,
1236 ExtensionFn::Promoted => device
1237 .get_semaphore_counter_value(raw)
1238 .map_err(map_host_device_oom_and_lost_err)?,
1239 })
1240 },
1241 Self::FencePool(ref pool) => {
1242 let FencePool {
1243 last_completed,
1244 ref active,
1245 free: _,
1246 } = *pool.read();
1247 Self::check_active(device, last_completed, active)
1248 }
1249 }
1250 }
1251
1252 /// Trim the internal state of this [`Fence`].
1253 ///
1254 /// This function has no externally visible effect, but you should call it
1255 /// periodically to keep this fence's resource consumption under control.
1256 ///
1257 /// For fences using the [`FencePool`] implementation, this function
1258 /// recycles fences that have been signaled. If you don't call this,
1259 /// [`Queue::submit`] will just keep allocating a new Vulkan fence every
1260 /// time it's called.
1261 ///
1262 /// [`FencePool`]: Fence::FencePool
1263 /// [`Queue::submit`]: crate::Queue::submit
1264 fn maintain(&self, device: &ash::Device) -> Result<(), crate::DeviceError> {
1265 match *self {
1266 Self::TimelineSemaphore(_) => {}
1267 Self::FencePool(ref pool) => {
1268 let FencePool {
1269 ref mut last_completed,
1270 ref mut active,
1271 ref mut free,
1272 } = *pool.write();
1273
1274 let base_free = free.len();
1275 let latest = Self::check_active(device, *last_completed, active)?;
1276
1277 active.retain_mut(|&mut (value, ref mut fence)| {
1278 if value > latest {
1279 true
1280 } else if let Some(fence) = Arc::get_mut(fence) {
1281 // No other references to these, so we have exclusive access. Add them to free and reset them later,
1282 // but drop them from active immediately
1283 free.push(*fence);
1284 false
1285 } else {
1286 // some other function is using it. Although this shouldn't be to long,
1287 // maintain shouldn't block, and it should be cleared up by the next time it happens
1288 true
1289 }
1290 });
1291
1292 if free.len() != base_free {
1293 unsafe { device.reset_fences(&free[base_free..]) }
1294 .map_err(map_device_oom_err)?
1295 }
1296 *last_completed = latest;
1297 }
1298 }
1299 Ok(())
1300 }
1301}
1302
1303impl crate::Queue for Queue {
1304 type A = Api;
1305
1306 unsafe fn submit(
1307 &self,
1308 command_buffers: &[&CommandBuffer],
1309 surface_textures: &[&SurfaceTexture],
1310 (signal_fence, signal_value): (&Fence, crate::FenceValue),
1311 ) -> Result<(), crate::DeviceError> {
1312 let mut fence_raw = vk::Fence::null();
1313
1314 let mut wait_semaphores = SemaphoreList::new(SemaphoreListMode::Wait);
1315 let mut signal_semaphores = SemaphoreList::new(SemaphoreListMode::Signal);
1316
1317 // Double check that the same swapchain image isn't being given to us multiple times,
1318 // as that will deadlock when we try to lock them all.
1319 debug_assert!(
1320 {
1321 let mut check = HashSet::with_capacity(surface_textures.len());
1322 // We compare the Box by pointer, as Eq isn't well defined for SurfaceSemaphores.
1323 for st in surface_textures {
1324 let ptr: *const () = <*const _>::cast(&*st.metadata);
1325 check.insert(ptr as usize);
1326 }
1327 check.len() == surface_textures.len()
1328 },
1329 "More than one surface texture is being used from the same swapchain. This will cause a deadlock in release."
1330 );
1331
1332 let locked_swapchain_semaphores = surface_textures
1333 .iter()
1334 .map(|st| st.metadata.get_semaphore_guard())
1335 .collect::<Vec<_>>();
1336
1337 for mut semaphores in locked_swapchain_semaphores {
1338 semaphores.set_used_fence_value(signal_value);
1339
1340 // If we're the first submission to operate on this image, wait on
1341 // its acquire semaphore, to make sure the presentation engine is
1342 // done with it.
1343 if let Some(sem) = semaphores.get_acquire_wait_semaphore() {
1344 wait_semaphores.push_wait(sem, vk::PipelineStageFlags::TOP_OF_PIPE);
1345 }
1346
1347 // Get a semaphore to signal when we're done writing to this surface
1348 // image. Presentation of this image will wait for this.
1349 let signal_semaphore = semaphores.get_submit_signal_semaphore(&self.device)?;
1350 signal_semaphores.push_signal(signal_semaphore);
1351 }
1352
1353 let mut guard = self.signal_semaphores.lock();
1354 if !guard.is_empty() {
1355 signal_semaphores.append(&mut guard);
1356 }
1357
1358 let mut wait_guard = self.wait_semaphores.lock();
1359 if !wait_guard.is_empty() {
1360 wait_semaphores.append(&mut wait_guard);
1361 }
1362
1363 // In order for submissions to be strictly ordered, we encode a dependency between each submission
1364 // using a pair of semaphores. This adds a wait if it is needed, and signals the next semaphore.
1365 let semaphore_state = self.relay_semaphores.lock().advance(&self.device)?;
1366
1367 if let Some(sem) = semaphore_state.wait {
1368 wait_semaphores.push_wait(
1369 SemaphoreType::Binary(sem),
1370 vk::PipelineStageFlags::TOP_OF_PIPE,
1371 );
1372 }
1373
1374 signal_semaphores.push_signal(SemaphoreType::Binary(semaphore_state.signal));
1375
1376 // We need to signal our wgpu::Fence if we have one, this adds it to the signal list.
1377 signal_fence.maintain(&self.device.raw)?;
1378 // Keeping the Arc around is probably unneeded - the fence should never be signaled as it was reset,
1379 // and newer submits should not happen until this submit is done. Therefore, it should be too high
1380 // to be reset.
1381 let shared_fence;
1382 match *signal_fence {
1383 Fence::TimelineSemaphore(raw) => {
1384 signal_semaphores.push_signal(SemaphoreType::Timeline(raw, signal_value));
1385 }
1386 Fence::FencePool(ref pool) => {
1387 let FencePool {
1388 ref mut active,
1389 ref mut free,
1390 ..
1391 } = *pool.write();
1392 shared_fence = match free.pop() {
1393 Some(raw) => Arc::new(raw),
1394 None => unsafe {
1395 let fence = self
1396 .device
1397 .raw
1398 .create_fence(&vk::FenceCreateInfo::default(), None)
1399 .map_err(map_host_device_oom_err)?;
1400 Arc::new(fence)
1401 },
1402 };
1403 fence_raw = *shared_fence;
1404 active.push((signal_value, shared_fence.clone()));
1405 }
1406 }
1407
1408 let vk_cmd_buffers = command_buffers
1409 .iter()
1410 .map(|cmd| cmd.raw)
1411 .collect::<Vec<_>>();
1412
1413 let mut vk_info = vk::SubmitInfo::default().command_buffers(&vk_cmd_buffers);
1414 let mut vk_timeline_info = mem::MaybeUninit::uninit();
1415 vk_info = SemaphoreList::add_to_submit(
1416 &mut wait_semaphores,
1417 &mut signal_semaphores,
1418 vk_info,
1419 &mut vk_timeline_info,
1420 );
1421
1422 profiling::scope!("vkQueueSubmit");
1423 unsafe {
1424 self.device
1425 .raw
1426 .queue_submit(self.raw, &[vk_info], fence_raw)
1427 .map_err(map_host_device_oom_and_lost_err)?
1428 };
1429 Ok(())
1430 }
1431
1432 unsafe fn present(
1433 &self,
1434 surface: &Surface,
1435 texture: SurfaceTexture,
1436 ) -> Result<(), crate::SurfaceError> {
1437 let mut swapchain = surface.swapchain.write();
1438
1439 unsafe { swapchain.as_mut().unwrap().present(self, texture) }
1440 }
1441
1442 unsafe fn get_timestamp_period(&self) -> f32 {
1443 self.device.timestamp_period
1444 }
1445
1446 unsafe fn wait_for_idle(&self) -> Result<(), crate::DeviceError> {
1447 unsafe { self.device.raw.queue_wait_idle(self.raw) }
1448 .map_err(map_host_device_oom_and_lost_err)
1449 }
1450}
1451
1452impl Queue {
1453 pub fn raw_device(&self) -> &ash::Device {
1454 &self.device.raw
1455 }
1456
1457 pub fn add_signal_semaphore(&self, semaphore: vk::Semaphore, semaphore_value: Option<u64>) {
1458 let mut guard = self.signal_semaphores.lock();
1459 if let Some(value) = semaphore_value {
1460 guard.push_signal(SemaphoreType::Timeline(semaphore, value));
1461 } else {
1462 guard.push_signal(SemaphoreType::Binary(semaphore));
1463 }
1464 }
1465
1466 /// Remove `semaphore` from the pending signal list if it is still present.
1467 ///
1468 /// Returns `true` if the semaphore was found and removed. If the submit
1469 /// already consumed it, this is a harmless no-op that returns `false`.
1470 pub fn remove_signal_semaphore(&self, semaphore: vk::Semaphore) -> bool {
1471 self.signal_semaphores.lock().remove(semaphore)
1472 }
1473
1474 /// Stage a semaphore wait on the next [`crate::Queue::submit`] call.
1475 ///
1476 /// `semaphore_value` selects the kind of payload the wait targets:
1477 ///
1478 /// - `Some(value)` - wait until `semaphore` (a timeline semaphore) has been signalled to at least `value`.
1479 /// - `None` - wait on a binary semaphore signal.
1480 ///
1481 /// `stage` is the pipeline stage at which the wait blocks downstream
1482 /// work (e.g. `vk::PipelineStageFlags::TOP_OF_PIPE` to gate the
1483 /// entire submission, or a more specific stage when only that stage
1484 /// reads the synchronised resource).
1485 pub fn add_wait_semaphore(
1486 &self,
1487 semaphore: vk::Semaphore,
1488 semaphore_value: Option<u64>,
1489 stage: vk::PipelineStageFlags,
1490 ) {
1491 let mut guard = self.wait_semaphores.lock();
1492 if let Some(value) = semaphore_value {
1493 guard.push_wait(SemaphoreType::Timeline(semaphore, value), stage);
1494 } else {
1495 guard.push_wait(SemaphoreType::Binary(semaphore), stage);
1496 }
1497 }
1498
1499 /// Remove `semaphore` from the pending wait list if it is still present.
1500 ///
1501 /// Returns `true` if the semaphore was found and removed. If the submit
1502 /// already consumed it, this is a no-op that returns `false`.
1503 pub fn remove_wait_semaphore(&self, semaphore: vk::Semaphore) -> bool {
1504 self.wait_semaphores.lock().remove(semaphore)
1505 }
1506}
1507
1508/// Maps
1509///
1510/// - VK_ERROR_OUT_OF_HOST_MEMORY
1511/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1512fn map_host_device_oom_err(err: vk::Result) -> crate::DeviceError {
1513 match err {
1514 vk::Result::ERROR_OUT_OF_HOST_MEMORY | vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => {
1515 get_oom_err(err)
1516 }
1517 e => get_unexpected_err(e),
1518 }
1519}
1520
1521/// Maps
1522///
1523/// - VK_ERROR_OUT_OF_HOST_MEMORY
1524/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1525/// - VK_ERROR_DEVICE_LOST
1526fn map_host_device_oom_and_lost_err(err: vk::Result) -> crate::DeviceError {
1527 match err {
1528 vk::Result::ERROR_DEVICE_LOST => get_lost_err(),
1529 other => map_host_device_oom_err(other),
1530 }
1531}
1532
1533/// Maps
1534///
1535/// - VK_ERROR_OUT_OF_HOST_MEMORY
1536/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1537/// - VK_ERROR_FRAGMENTATION
1538fn map_host_device_oom_and_fragmentation_err(err: vk::Result) -> crate::DeviceError {
1539 match err {
1540 vk::Result::ERROR_FRAGMENTATION => get_oom_err(err),
1541 other => map_host_device_oom_err(other),
1542 }
1543}
1544
1545/// Maps
1546///
1547/// - VK_ERROR_OUT_OF_HOST_MEMORY
1548/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1549/// - VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1550fn map_host_device_oom_and_ioca_err(err: vk::Result) -> crate::DeviceError {
1551 // We don't use VK_KHR_buffer_device_address
1552 // VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1553 map_host_device_oom_err(err)
1554}
1555
1556/// Maps
1557///
1558/// - VK_ERROR_OUT_OF_HOST_MEMORY
1559fn map_host_oom_err(err: vk::Result) -> crate::DeviceError {
1560 match err {
1561 vk::Result::ERROR_OUT_OF_HOST_MEMORY => get_oom_err(err),
1562 e => get_unexpected_err(e),
1563 }
1564}
1565
1566/// Maps
1567///
1568/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1569fn map_device_oom_err(err: vk::Result) -> crate::DeviceError {
1570 match err {
1571 vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => get_oom_err(err),
1572 e => get_unexpected_err(e),
1573 }
1574}
1575
1576/// Maps
1577///
1578/// - VK_ERROR_OUT_OF_HOST_MEMORY
1579/// - VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1580fn map_host_oom_and_ioca_err(err: vk::Result) -> crate::DeviceError {
1581 // We don't use VK_KHR_buffer_device_address
1582 // VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1583 map_host_oom_err(err)
1584}
1585
1586/// Maps
1587///
1588/// - VK_ERROR_OUT_OF_HOST_MEMORY
1589/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1590/// - VK_PIPELINE_COMPILE_REQUIRED_EXT
1591/// - VK_ERROR_INVALID_SHADER_NV
1592fn map_pipeline_err(err: vk::Result) -> crate::DeviceError {
1593 // We don't use VK_EXT_pipeline_creation_cache_control
1594 // VK_PIPELINE_COMPILE_REQUIRED_EXT
1595 // We don't use VK_NV_glsl_shader
1596 // VK_ERROR_INVALID_SHADER_NV
1597 map_host_device_oom_err(err)
1598}
1599
1600/// Returns [`crate::DeviceError::Unexpected`] or panics if the `internal_error_panic`
1601/// feature flag is enabled.
1602fn get_unexpected_err(_err: vk::Result) -> crate::DeviceError {
1603 #[cfg(feature = "internal_error_panic")]
1604 panic!("Unexpected Vulkan error: {_err:?}");
1605
1606 #[allow(unreachable_code)]
1607 crate::DeviceError::Unexpected
1608}
1609
1610/// Returns [`crate::DeviceError::OutOfMemory`].
1611fn get_oom_err(_err: vk::Result) -> crate::DeviceError {
1612 crate::DeviceError::OutOfMemory
1613}
1614
1615/// Returns [`crate::DeviceError::Lost`] or panics if the `device_lost_panic`
1616/// feature flag is enabled.
1617fn get_lost_err() -> crate::DeviceError {
1618 #[cfg(feature = "device_lost_panic")]
1619 panic!("Device lost");
1620
1621 #[allow(unreachable_code)]
1622 crate::DeviceError::Lost
1623}
1624
1625#[derive(Clone, Copy, Pod, Zeroable)]
1626#[repr(C)]
1627struct RawTlasInstance {
1628 transform: [f32; 12],
1629 custom_data_and_mask: u32,
1630 shader_binding_table_record_offset_and_flags: u32,
1631 acceleration_structure_reference: u64,
1632}
1633
1634/// Arguments to the [`CreateDeviceCallback`].
1635pub struct CreateDeviceCallbackArgs<'arg, 'pnext, 'this>
1636where
1637 'this: 'pnext,
1638{
1639 /// The extensions to enable for the device. You must not remove anything from this list,
1640 /// but you may add to it.
1641 pub extensions: &'arg mut Vec<&'static CStr>,
1642 /// The physical device features to enable. You may enable features, but must not disable any.
1643 pub device_features: &'arg mut PhysicalDeviceFeatures,
1644 /// The queue create infos for the device. You may add or modify queue create infos as needed.
1645 pub queue_create_infos: &'arg mut Vec<vk::DeviceQueueCreateInfo<'pnext>>,
1646 /// The create info for the device. You may add or modify things in the pnext chain, but
1647 /// do not turn features off. Additionally, do not add things to the list of extensions,
1648 /// or to the feature set, as all changes to that member will be overwritten.
1649 pub create_info: &'arg mut vk::DeviceCreateInfo<'pnext>,
1650 /// We need to have `'this` in the struct, so we can declare that all lifetimes coming from
1651 /// captures in the closure will live longer (and hence satisfy) `'pnext`. However, we
1652 /// don't actually directly use `'this`
1653 _phantom: PhantomData<&'this ()>,
1654}
1655
1656/// Callback to allow changing the vulkan device creation parameters.
1657///
1658/// # Safety:
1659/// - If you want to add extensions, add the to the `Vec<'static CStr>` not the create info,
1660/// as the create info value will be overwritten.
1661/// - Callback must not remove features.
1662/// - Callback must not change anything to what the instance does not support.
1663pub type CreateDeviceCallback<'this> =
1664 dyn for<'arg, 'pnext> FnOnce(CreateDeviceCallbackArgs<'arg, 'pnext, 'this>) + 'this;
1665
1666/// Arguments to the [`CreateInstanceCallback`].
1667pub struct CreateInstanceCallbackArgs<'arg, 'pnext, 'this>
1668where
1669 'this: 'pnext,
1670{
1671 /// The extensions to enable for the instance. You must not remove anything from this list,
1672 /// but you may add to it.
1673 pub extensions: &'arg mut Vec<&'static CStr>,
1674 /// The create info for the instance. You may add or modify things in the pnext chain, but
1675 /// do not turn features off. Additionally, do not add things to the list of extensions,
1676 /// all changes to that member will be overwritten.
1677 pub create_info: &'arg mut vk::InstanceCreateInfo<'pnext>,
1678 /// Vulkan entry point.
1679 pub entry: &'arg ash::Entry,
1680 /// We need to have `'this` in the struct, so we can declare that all lifetimes coming from
1681 /// captures in the closure will live longer (and hence satisfy) `'pnext`. However, we
1682 /// don't actually directly use `'this`
1683 _phantom: PhantomData<&'this ()>,
1684}
1685
1686/// Callback to allow changing the vulkan instance creation parameters.
1687///
1688/// # Safety:
1689/// - If you want to add extensions, add the to the `Vec<'static CStr>` not the create info,
1690/// as the create info value will be overwritten.
1691/// - Callback must not remove features.
1692/// - Callback must not change anything to what the instance does not support.
1693pub type CreateInstanceCallback<'this> =
1694 dyn for<'arg, 'pnext> FnOnce(CreateInstanceCallbackArgs<'arg, 'pnext, 'this>) + 'this;