wgpu_hal/vulkan/mod.rs
1/*!
2# Vulkan API internals.
3
4## Stack memory
5
6Ash expects slices, which we don't generally have available.
7We cope with this requirement by the combination of the following ways:
8 - temporarily allocating `Vec` on heap, where overhead is permitted
9 - growing temporary local storage
10
11## Framebuffers and Render passes
12
13Render passes are cached on the device and kept forever.
14
15Framebuffers are also cached on the device, but they are removed when
16any of the image views (they have) gets removed.
17If Vulkan supports image-less framebuffers,
18then the actual views are excluded from the framebuffer key.
19
20## Fences
21
22If timeline semaphores are available, they are used 1:1 with wgpu-hal fences.
23Otherwise, we manage a pool of `VkFence` objects behind each `hal::Fence`.
24
25!*/
26
27mod adapter;
28mod command;
29pub mod conv;
30mod descriptor;
31mod device;
32mod drm;
33mod instance;
34mod sampler;
35mod semaphore_list;
36mod swapchain;
37
38pub use adapter::PhysicalDeviceFeatures;
39
40use alloc::{boxed::Box, ffi::CString, sync::Arc, vec::Vec};
41use core::{borrow::Borrow, ffi::CStr, fmt, marker::PhantomData, mem, num::NonZeroU32};
42
43use arrayvec::ArrayVec;
44use ash::{ext, khr, vk};
45use bytemuck::{Pod, Zeroable};
46use hashbrown::HashSet;
47use parking_lot::{Mutex, RwLock};
48
49use naga::FastHashMap;
50use wgt::InternalCounter;
51
52use semaphore_list::SemaphoreList;
53
54use crate::vulkan::semaphore_list::{SemaphoreListMode, SemaphoreType};
55
56const MAX_TOTAL_ATTACHMENTS: usize = crate::MAX_COLOR_ATTACHMENTS * 2 + 1;
57
58#[derive(Clone, Debug)]
59pub struct Api;
60
61impl crate::Api for Api {
62 const VARIANT: wgt::Backend = wgt::Backend::Vulkan;
63
64 type Instance = Instance;
65 type Surface = Surface;
66 type Adapter = Adapter;
67 type Device = Device;
68
69 type Queue = Queue;
70 type CommandEncoder = CommandEncoder;
71 type CommandBuffer = CommandBuffer;
72
73 type Buffer = Buffer;
74 type Texture = Texture;
75 type SurfaceTexture = SurfaceTexture;
76 type TextureView = TextureView;
77 type Sampler = Sampler;
78 type QuerySet = QuerySet;
79 type Fence = Fence;
80 type AccelerationStructure = AccelerationStructure;
81 type PipelineCache = PipelineCache;
82
83 type BindGroupLayout = BindGroupLayout;
84 type BindGroup = BindGroup;
85 type PipelineLayout = PipelineLayout;
86 type ShaderModule = ShaderModule;
87 type RenderPipeline = RenderPipeline;
88 type ComputePipeline = ComputePipeline;
89}
90
91crate::impl_dyn_resource!(
92 Adapter,
93 AccelerationStructure,
94 BindGroup,
95 BindGroupLayout,
96 Buffer,
97 CommandBuffer,
98 CommandEncoder,
99 ComputePipeline,
100 Device,
101 Fence,
102 Instance,
103 PipelineCache,
104 PipelineLayout,
105 QuerySet,
106 Queue,
107 RenderPipeline,
108 Sampler,
109 ShaderModule,
110 Surface,
111 SurfaceTexture,
112 Texture,
113 TextureView
114);
115
116struct DebugUtils {
117 extension: ext::debug_utils::Instance,
118 messenger: vk::DebugUtilsMessengerEXT,
119
120 /// Owning pointer to the debug messenger callback user data.
121 ///
122 /// `InstanceShared::drop` destroys the debug messenger before
123 /// dropping this, so the callback should never receive a dangling
124 /// user data pointer.
125 #[allow(dead_code)]
126 callback_data: Box<DebugUtilsMessengerUserData>,
127}
128
129pub struct DebugUtilsCreateInfo {
130 severity: vk::DebugUtilsMessageSeverityFlagsEXT,
131 message_type: vk::DebugUtilsMessageTypeFlagsEXT,
132 callback_data: Box<DebugUtilsMessengerUserData>,
133}
134
135#[derive(Debug)]
136/// The properties related to the validation layer needed for the
137/// DebugUtilsMessenger for their workarounds
138struct ValidationLayerProperties {
139 /// Validation layer description, from `vk::LayerProperties`.
140 layer_description: CString,
141
142 /// Validation layer specification version, from `vk::LayerProperties`.
143 layer_spec_version: u32,
144}
145
146/// User data needed by `instance::debug_utils_messenger_callback`.
147///
148/// When we create the [`vk::DebugUtilsMessengerEXT`], the `pUserData`
149/// pointer refers to one of these values.
150#[derive(Debug)]
151pub struct DebugUtilsMessengerUserData {
152 /// The properties related to the validation layer, if present
153 validation_layer_properties: Option<ValidationLayerProperties>,
154
155 /// If the OBS layer is present. OBS never increments the version of their layer,
156 /// so there's no reason to have the version.
157 has_obs_layer: bool,
158}
159
160pub struct InstanceShared {
161 raw: ash::Instance,
162 extensions: Vec<&'static CStr>,
163 flags: wgt::InstanceFlags,
164 memory_budget_thresholds: wgt::MemoryBudgetThresholds,
165 debug_utils: Option<DebugUtils>,
166 get_physical_device_properties: Option<khr::get_physical_device_properties2::Instance>,
167 entry: ash::Entry,
168 has_nv_optimus: bool,
169 android_sdk_version: u32,
170 /// The instance API version.
171 ///
172 /// Which is the version of Vulkan supported for instance-level functionality.
173 ///
174 /// It is associated with a `VkInstance` and its children,
175 /// except for a `VkPhysicalDevice` and its children.
176 instance_api_version: u32,
177
178 // The `drop_guard` field must be the last field of this struct so it is dropped last.
179 // Do not add new fields after it.
180 drop_guard: Option<crate::DropGuard>,
181}
182
183pub struct Instance {
184 shared: Arc<InstanceShared>,
185}
186
187pub struct Surface {
188 swapchain: RwLock<Option<Box<dyn swapchain::Swapchain>>>,
189 inner: Box<dyn swapchain::Surface>,
190}
191
192impl Surface {
193 /// Returns the raw Vulkan surface handle.
194 ///
195 /// Returns `None` if the surface is a DXGI surface.
196 pub unsafe fn raw_native_handle(&self) -> Option<vk::SurfaceKHR> {
197 Some(
198 self.inner
199 .as_any()
200 .downcast_ref::<swapchain::NativeSurface>()?
201 .as_raw(),
202 )
203 }
204
205 /// Get the raw Vulkan swapchain associated with this surface.
206 ///
207 /// Returns [`None`] if the surface is not configured or if the swapchain
208 /// is a DXGI swapchain.
209 pub fn raw_native_swapchain(&self) -> Option<vk::SwapchainKHR> {
210 let read = self.swapchain.read();
211 Some(
212 read.as_ref()?
213 .as_any()
214 .downcast_ref::<swapchain::NativeSwapchain>()?
215 .as_raw(),
216 )
217 }
218
219 /// Set the present timing information which will be used for the next [presentation](crate::Queue::present()) of this surface,
220 /// using [VK_GOOGLE_display_timing].
221 ///
222 /// This can be used to give an id to presentations, for future use of [`vk::PastPresentationTimingGOOGLE`].
223 /// Note that `wgpu-hal` does *not* provide a way to use that API - you should manually access this through [`ash`].
224 ///
225 /// This can also be used to add a "not before" timestamp to the presentation.
226 ///
227 /// The exact semantics of the fields are also documented in the [specification](https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPresentTimeGOOGLE.html) for the extension.
228 ///
229 /// # Panics
230 ///
231 /// - If the surface hasn't been configured.
232 /// - If the surface has been configured for a DXGI swapchain.
233 /// - If the device doesn't [support present timing](wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING).
234 ///
235 /// [VK_GOOGLE_display_timing]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_GOOGLE_display_timing.html
236 #[track_caller]
237 pub fn set_next_present_time(&self, present_timing: vk::PresentTimeGOOGLE) {
238 let mut swapchain = self.swapchain.write();
239 swapchain
240 .as_mut()
241 .expect("Surface should have been configured")
242 .as_any_mut()
243 .downcast_mut::<swapchain::NativeSwapchain>()
244 .expect("Surface should have a native Vulkan swapchain")
245 .set_next_present_time(present_timing);
246 }
247}
248
249#[derive(Debug)]
250pub struct SurfaceTexture {
251 index: u32,
252 texture: Texture,
253 metadata: Box<dyn swapchain::SurfaceTextureMetadata>,
254}
255
256impl crate::DynSurfaceTexture for SurfaceTexture {}
257
258impl Borrow<Texture> for SurfaceTexture {
259 fn borrow(&self) -> &Texture {
260 &self.texture
261 }
262}
263
264impl Borrow<dyn crate::DynTexture> for SurfaceTexture {
265 fn borrow(&self) -> &dyn crate::DynTexture {
266 &self.texture
267 }
268}
269
270pub struct Adapter {
271 raw: vk::PhysicalDevice,
272 instance: Arc<InstanceShared>,
273 //queue_families: Vec<vk::QueueFamilyProperties>,
274 known_memory_flags: vk::MemoryPropertyFlags,
275 phd_capabilities: adapter::PhysicalDeviceProperties,
276 phd_features: PhysicalDeviceFeatures,
277 downlevel_flags: wgt::DownlevelFlags,
278 private_caps: PrivateCapabilities,
279 workarounds: Workarounds,
280}
281
282// TODO there's no reason why this can't be unified--the function pointers should all be the same--it's not clear how to do this with `ash`.
283enum ExtensionFn<T> {
284 /// The loaded function pointer struct for an extension.
285 Extension(T),
286 /// The extension was promoted to a core version of Vulkan and the functions on `ash`'s `DeviceV1_x` traits should be used.
287 Promoted,
288}
289
290struct DeviceExtensionFunctions {
291 debug_utils: Option<ext::debug_utils::Device>,
292 draw_indirect_count: Option<khr::draw_indirect_count::Device>,
293 timeline_semaphore: Option<ExtensionFn<khr::timeline_semaphore::Device>>,
294 ray_tracing: Option<RayTracingDeviceExtensionFunctions>,
295 mesh_shading: Option<ext::mesh_shader::Device>,
296 #[cfg_attr(not(unix), allow(dead_code))]
297 external_memory_fd: Option<khr::external_memory_fd::Device>,
298}
299
300struct RayTracingDeviceExtensionFunctions {
301 acceleration_structure: khr::acceleration_structure::Device,
302 buffer_device_address: khr::buffer_device_address::Device,
303}
304
305/// Set of internal capabilities, which don't show up in the exposed
306/// device geometry, but affect the code paths taken internally.
307#[derive(Clone, Debug)]
308struct PrivateCapabilities {
309 image_view_usage: bool,
310 timeline_semaphores: bool,
311 texture_d24: bool,
312 texture_d24_s8: bool,
313 texture_s8: bool,
314 /// Ability to present contents to any screen. Only needed to work around broken platform configurations.
315 can_present: bool,
316 non_coherent_map_mask: wgt::BufferAddress,
317 multi_draw_indirect: bool,
318 max_draw_indirect_count: u32,
319
320 /// True if this adapter advertises the [`robustBufferAccess`][vrba] feature.
321 ///
322 /// Note that Vulkan's `robustBufferAccess` is not sufficient to implement
323 /// `wgpu_hal`'s guarantee that shaders will not access buffer contents via
324 /// a given bindgroup binding outside that binding's [accessible
325 /// region][ar]. Enabling `robustBufferAccess` does ensure that
326 /// out-of-bounds reads and writes are not undefined behavior (that's good),
327 /// but still permits out-of-bounds reads to return data from anywhere
328 /// within the buffer, not just the accessible region.
329 ///
330 /// [ar]: ../struct.BufferBinding.html#accessible-region
331 /// [vrba]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#features-robustBufferAccess
332 robust_buffer_access: bool,
333
334 robust_image_access: bool,
335
336 /// True if this adapter supports the [`VK_EXT_robustness2`] extension's
337 /// [`robustBufferAccess2`] feature.
338 ///
339 /// This is sufficient to implement `wgpu_hal`'s [required bounds-checking][ar] of
340 /// shader accesses to buffer contents. If this feature is not available,
341 /// this backend must have Naga inject bounds checks in the generated
342 /// SPIR-V.
343 ///
344 /// [`VK_EXT_robustness2`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_EXT_robustness2.html
345 /// [`robustBufferAccess2`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPhysicalDeviceRobustness2FeaturesEXT.html#features-robustBufferAccess2
346 /// [ar]: ../struct.BufferBinding.html#accessible-region
347 robust_buffer_access2: bool,
348
349 robust_image_access2: bool,
350 zero_initialize_workgroup_memory: bool,
351 image_format_list: bool,
352 maximum_samplers: u32,
353
354 /// True if this adapter supports the [`VK_KHR_shader_integer_dot_product`] extension
355 /// (promoted to Vulkan 1.3).
356 ///
357 /// This is used to generate optimized code for WGSL's `dot4{I, U}8Packed`.
358 ///
359 /// [`VK_KHR_shader_integer_dot_product`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_KHR_shader_integer_dot_product.html
360 shader_integer_dot_product: bool,
361
362 /// True if this adapter supports 8-bit integers provided by the
363 /// [`VK_KHR_shader_float16_int8`] extension (promoted to Vulkan 1.2).
364 ///
365 /// Allows shaders to declare the "Int8" capability. Note, however, that this
366 /// feature alone allows the use of 8-bit integers "only in the `Private`,
367 /// `Workgroup` (for non-Block variables), and `Function` storage classes"
368 /// ([see spec]). To use 8-bit integers in the interface storage classes (e.g.,
369 /// `StorageBuffer`), you also need to enable the corresponding feature in
370 /// `VkPhysicalDevice8BitStorageFeatures` and declare the corresponding SPIR-V
371 /// capability (e.g., `StorageBuffer8BitAccess`).
372 ///
373 /// [`VK_KHR_shader_float16_int8`]: https://registry.khronos.org/vulkan/specs/latest/man/html/VK_KHR_shader_float16_int8.html
374 /// [see spec]: https://registry.khronos.org/vulkan/specs/latest/man/html/VkPhysicalDeviceShaderFloat16Int8Features.html#extension-features-shaderInt8
375 shader_int8: bool,
376
377 /// This is done to panic before undefined behavior, and is imperfect.
378 /// Basically, to allow implementations to emulate mv using instancing, if you
379 /// want to draw `n` instances to VR, you must draw `2n` instances, but you
380 /// can never draw more than `u32::MAX` instances. Therefore, when drawing
381 /// multiview on some vulkan implementations, it might restrict the instance
382 /// count, which isn't usually a thing in webgpu. We don't expose this limit
383 /// because its strange, i.e. only occurs on certain vulkan implementations
384 /// if you are drawing more than 128 million instances. We still want to avoid
385 /// undefined behavior in this situation, so we panic if the limit is violated.
386 multiview_instance_index_limit: u32,
387
388 /// BufferUsages::ACCELERATION_STRUCTURE_SCRATCH allows usage as a scratch buffer.
389 /// Vulkan has no way to specify this as a usage, and it maps to other usages, but
390 /// these usages do not have as high of an alignment requirement using the buffer as
391 /// a scratch buffer when building acceleration structures.
392 scratch_buffer_alignment: u32,
393}
394
395bitflags::bitflags!(
396 /// Workaround flags.
397 #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
398 pub struct Workarounds: u32 {
399 /// Only generate SPIR-V for one entry point at a time.
400 const SEPARATE_ENTRY_POINTS = 0x1;
401 /// Qualcomm OOMs when there are zero color attachments but a non-null pointer
402 /// to a subpass resolve attachment array. This nulls out that pointer in that case.
403 const EMPTY_RESOLVE_ATTACHMENT_LISTS = 0x2;
404 /// If the following code returns false, then nvidia will end up filling the wrong range.
405 ///
406 /// ```skip
407 /// fn nvidia_succeeds() -> bool {
408 /// # let (copy_length, start_offset) = (0, 0);
409 /// if copy_length >= 4096 {
410 /// if start_offset % 16 != 0 {
411 /// if copy_length == 4096 {
412 /// return true;
413 /// }
414 /// if copy_length % 16 == 0 {
415 /// return false;
416 /// }
417 /// }
418 /// }
419 /// true
420 /// }
421 /// ```
422 ///
423 /// As such, we need to make sure all calls to vkCmdFillBuffer are aligned to 16 bytes
424 /// if they cover a range of 4096 bytes or more.
425 const FORCE_FILL_BUFFER_WITH_SIZE_GREATER_4096_ALIGNED_OFFSET_16 = 0x4;
426 }
427);
428
429#[derive(Clone, Debug, Eq, Hash, PartialEq)]
430struct AttachmentKey {
431 format: vk::Format,
432 layout: vk::ImageLayout,
433 ops: crate::AttachmentOps,
434}
435
436impl AttachmentKey {
437 /// Returns an attachment key for a compatible attachment.
438 fn compatible(format: vk::Format, layout: vk::ImageLayout) -> Self {
439 Self {
440 format,
441 layout,
442 ops: crate::AttachmentOps::all(),
443 }
444 }
445}
446
447#[derive(Clone, Eq, Hash, PartialEq)]
448struct ColorAttachmentKey {
449 base: AttachmentKey,
450 resolve: Option<AttachmentKey>,
451}
452
453#[derive(Clone, Eq, Hash, PartialEq)]
454struct DepthStencilAttachmentKey {
455 base: AttachmentKey,
456 stencil_ops: crate::AttachmentOps,
457}
458
459#[derive(Clone, Eq, Default, Hash, PartialEq)]
460struct RenderPassKey {
461 colors: ArrayVec<Option<ColorAttachmentKey>, { crate::MAX_COLOR_ATTACHMENTS }>,
462 depth_stencil: Option<DepthStencilAttachmentKey>,
463 sample_count: u32,
464 multiview_mask: Option<NonZeroU32>,
465}
466
467struct DeviceShared {
468 raw: ash::Device,
469 family_index: u32,
470 queue_index: u32,
471 raw_queue: vk::Queue,
472 instance: Arc<InstanceShared>,
473 physical_device: vk::PhysicalDevice,
474 enabled_extensions: Vec<&'static CStr>,
475 extension_fns: DeviceExtensionFunctions,
476 vendor_id: u32,
477 pipeline_cache_validation_key: [u8; 16],
478 timestamp_period: f32,
479 private_caps: PrivateCapabilities,
480 workarounds: Workarounds,
481 features: wgt::Features,
482 render_passes: Mutex<FastHashMap<RenderPassKey, vk::RenderPass>>,
483 sampler_cache: Mutex<sampler::SamplerCache>,
484 memory_allocations_counter: InternalCounter,
485
486 /// Because we have cached framebuffers which are not deleted from until
487 /// the device is destroyed, if the implementation of vulkan re-uses handles
488 /// we need some way to differentiate between the old handle and the new handle.
489 /// This factory allows us to have a dedicated identity value for each texture.
490 texture_identity_factory: ResourceIdentityFactory<vk::Image>,
491 /// As above, for texture views.
492 texture_view_identity_factory: ResourceIdentityFactory<vk::ImageView>,
493
494 empty_descriptor_set_layout: vk::DescriptorSetLayout,
495
496 // The `drop_guard` field must be the last field of this struct so it is dropped last.
497 // Do not add new fields after it.
498 drop_guard: Option<crate::DropGuard>,
499}
500
501impl Drop for DeviceShared {
502 fn drop(&mut self) {
503 for &raw in self.render_passes.lock().values() {
504 unsafe { self.raw.destroy_render_pass(raw, None) };
505 }
506 unsafe {
507 self.raw
508 .destroy_descriptor_set_layout(self.empty_descriptor_set_layout, None)
509 };
510 if self.drop_guard.is_none() {
511 unsafe { self.raw.destroy_device(None) };
512 }
513 }
514}
515
516pub struct Device {
517 mem_allocator: Mutex<gpu_allocator::vulkan::Allocator>,
518 desc_allocator: Mutex<descriptor::DescriptorAllocator>,
519 valid_ash_memory_types: u32,
520 naga_options: naga::back::spv::Options<'static>,
521 #[cfg(feature = "renderdoc")]
522 render_doc: crate::auxil::renderdoc::RenderDoc,
523 counters: Arc<wgt::HalCounters>,
524 // Struct members are dropped from first to last, put the Device last to ensure that
525 // all resources that depends on it are destroyed before it like the mem_allocator
526 shared: Arc<DeviceShared>,
527}
528
529impl Drop for Device {
530 fn drop(&mut self) {}
531}
532
533/// Semaphores for forcing queue submissions to run in order.
534///
535/// The [`wgpu_hal::Queue`] trait promises that if two calls to [`submit`] are
536/// ordered, then the first submission will finish on the GPU before the second
537/// submission begins. To get this behavior on Vulkan we need to pass semaphores
538/// to [`vkQueueSubmit`] for the commands to wait on before beginning execution,
539/// and to signal when their execution is done.
540///
541/// Normally this can be done with a single semaphore, waited on and then
542/// signalled for each submission. At any given time there's exactly one
543/// submission that would signal the semaphore, and exactly one waiting on it,
544/// as Vulkan requires.
545///
546/// However, as of Oct 2021, bug [#5508] in the Mesa ANV drivers caused them to
547/// hang if we use a single semaphore. The workaround is to alternate between
548/// two semaphores. The bug has been fixed in Mesa, but we should probably keep
549/// the workaround until, say, Oct 2026.
550///
551/// [`wgpu_hal::Queue`]: crate::Queue
552/// [`submit`]: crate::Queue::submit
553/// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
554/// [#5508]: https://gitlab.freedesktop.org/mesa/mesa/-/issues/5508
555#[derive(Clone)]
556struct RelaySemaphores {
557 /// The semaphore the next submission should wait on before beginning
558 /// execution on the GPU. This is `None` for the first submission, which
559 /// should not wait on anything at all.
560 wait: Option<vk::Semaphore>,
561
562 /// The semaphore the next submission should signal when it has finished
563 /// execution on the GPU.
564 signal: vk::Semaphore,
565}
566
567impl RelaySemaphores {
568 fn new(device: &DeviceShared) -> Result<Self, crate::DeviceError> {
569 Ok(Self {
570 wait: None,
571 signal: device.new_binary_semaphore("RelaySemaphores: 1")?,
572 })
573 }
574
575 /// Advances the semaphores, returning the semaphores that should be used for a submission.
576 fn advance(&mut self, device: &DeviceShared) -> Result<Self, crate::DeviceError> {
577 let old = self.clone();
578
579 // Build the state for the next submission.
580 match self.wait {
581 None => {
582 // The `old` values describe the first submission to this queue.
583 // The second submission should wait on `old.signal`, and then
584 // signal a new semaphore which we'll create now.
585 self.wait = Some(old.signal);
586 self.signal = device.new_binary_semaphore("RelaySemaphores: 2")?;
587 }
588 Some(ref mut wait) => {
589 // What this submission signals, the next should wait.
590 mem::swap(wait, &mut self.signal);
591 }
592 };
593
594 Ok(old)
595 }
596
597 /// Destroys the semaphores.
598 unsafe fn destroy(&self, device: &ash::Device) {
599 unsafe {
600 if let Some(wait) = self.wait {
601 device.destroy_semaphore(wait, None);
602 }
603 device.destroy_semaphore(self.signal, None);
604 }
605 }
606}
607
608pub struct Queue {
609 raw: vk::Queue,
610 device: Arc<DeviceShared>,
611 family_index: u32,
612 relay_semaphores: Mutex<RelaySemaphores>,
613 signal_semaphores: Mutex<SemaphoreList>,
614 wait_semaphores: Mutex<SemaphoreList>,
615}
616
617impl Queue {
618 pub fn as_raw(&self) -> vk::Queue {
619 self.raw
620 }
621}
622
623impl Drop for Queue {
624 fn drop(&mut self) {
625 unsafe { self.relay_semaphores.lock().destroy(&self.device.raw) };
626 }
627}
628#[derive(Debug)]
629enum BufferMemoryBacking {
630 Managed(gpu_allocator::vulkan::Allocation),
631 VulkanMemory {
632 memory: vk::DeviceMemory,
633 offset: u64,
634 size: u64,
635 },
636}
637impl BufferMemoryBacking {
638 fn memory(&self) -> vk::DeviceMemory {
639 match self {
640 Self::Managed(m) => unsafe { m.memory() },
641 Self::VulkanMemory { memory, .. } => *memory,
642 }
643 }
644 fn offset(&self) -> u64 {
645 match self {
646 Self::Managed(m) => m.offset(),
647 Self::VulkanMemory { offset, .. } => *offset,
648 }
649 }
650 fn size(&self) -> u64 {
651 match self {
652 Self::Managed(m) => m.size(),
653 Self::VulkanMemory { size, .. } => *size,
654 }
655 }
656}
657/// Describes who owns a [`Buffer`]'s `vk::Buffer` handle and its backing memory,
658/// and therefore what cleanup is required when the buffer is destroyed.
659#[derive(Debug)]
660enum BufferOwnership {
661 /// wgpu-hal owns the `vk::Buffer` and its backing memory. On cleanup the buffer
662 /// handle is destroyed and the memory is released.
663 Managed(Mutex<BufferMemoryBacking>),
664 /// wgpu-hal owns the `vk::Buffer` handle but the backing memory is kept alive
665 /// by the caller. On cleanup only the buffer handle is destroyed.
666 RawHandle,
667 /// Caller owns the `vk::Buffer` and its backing memory. On cleanup the
668 /// [`crate::DropGuard`] runs the caller's cleanup callback and wgpu-hal touches
669 /// neither the handle nor the memory.
670 External(crate::DropGuard),
671}
672
673#[derive(Debug)]
674pub struct Buffer {
675 raw: vk::Buffer,
676
677 // This field must be last, because it may contain a `DropGuard` which needs to be dropped after all other fields.
678 ownership: BufferOwnership,
679}
680impl Buffer {
681 /// # Safety
682 ///
683 /// - `vk_buffer`'s memory must be managed by the caller
684 /// - Externally imported buffers can't be mapped by `wgpu`
685 pub unsafe fn from_raw(vk_buffer: vk::Buffer) -> Self {
686 Self {
687 raw: vk_buffer,
688 ownership: BufferOwnership::RawHandle,
689 }
690 }
691
692 /// # Safety
693 /// - `vk_buffer` must outlive the returned `Buffer`.
694 /// - wgpu-hal will NOT call `vkDestroyBuffer`; the caller remains responsible for the buffer handle's destruction.
695 /// The `drop_callback` runs when the `Buffer` drops and may be used to release caller-side bookkeeping.
696 /// - Externally imported buffers can't be mapped by `wgpu`.
697 pub unsafe fn from_raw_externally_owned(
698 vk_buffer: vk::Buffer,
699 drop_callback: crate::DropCallback,
700 ) -> Self {
701 Self {
702 raw: vk_buffer,
703 ownership: BufferOwnership::External(crate::DropGuard::new(drop_callback)),
704 }
705 }
706
707 /// # Safety
708 /// - We will use this buffer and the buffer's backing memory range as if we have exclusive ownership over it, until the wgpu resource is dropped and the wgpu-hal object is cleaned up
709 /// - Externally imported buffers can't be mapped by `wgpu`
710 /// - `offset` and `size` must be valid with the allocation of `memory`
711 pub unsafe fn from_raw_managed(
712 vk_buffer: vk::Buffer,
713 memory: vk::DeviceMemory,
714 offset: u64,
715 size: u64,
716 ) -> Self {
717 Self {
718 raw: vk_buffer,
719 ownership: BufferOwnership::Managed(Mutex::new(BufferMemoryBacking::VulkanMemory {
720 memory,
721 offset,
722 size,
723 })),
724 }
725 }
726
727 /// # Safety
728 /// - The buffer handle must not be manually destroyed
729 pub unsafe fn raw_handle(&self) -> vk::Buffer {
730 self.raw
731 }
732}
733
734impl crate::DynBuffer for Buffer {}
735
736#[derive(Debug)]
737pub struct AccelerationStructure {
738 raw: vk::AccelerationStructureKHR,
739 buffer: vk::Buffer,
740 allocation: gpu_allocator::vulkan::Allocation,
741 compacted_size_query: Option<vk::QueryPool>,
742}
743
744impl crate::DynAccelerationStructure for AccelerationStructure {}
745
746#[derive(Debug)]
747pub enum TextureMemory {
748 // shared memory in GPU allocator (owned by wgpu-hal)
749 Allocation(gpu_allocator::vulkan::Allocation),
750
751 // dedicated memory (owned by wgpu-hal)
752 Dedicated(vk::DeviceMemory),
753
754 // memory not owned by wgpu
755 External,
756}
757
758#[derive(Debug)]
759pub struct Texture {
760 raw: vk::Image,
761 memory: TextureMemory,
762 format: wgt::TextureFormat,
763 copy_size: crate::CopyExtent,
764 identity: ResourceIdentity<vk::Image>,
765
766 // The `drop_guard` field must be the last field of this struct so it is dropped last.
767 // Do not add new fields after it.
768 drop_guard: Option<crate::DropGuard>,
769}
770
771impl crate::DynTexture for Texture {}
772
773impl Texture {
774 /// # Safety
775 ///
776 /// - The image handle must not be manually destroyed
777 pub unsafe fn raw_handle(&self) -> vk::Image {
778 self.raw
779 }
780
781 /// # Safety
782 ///
783 /// - The caller must not free the `vk::DeviceMemory` or
784 /// `gpu_alloc::MemoryBlock` in the returned `TextureMemory`.
785 pub unsafe fn memory(&self) -> &TextureMemory {
786 &self.memory
787 }
788}
789
790#[derive(Debug)]
791pub struct TextureView {
792 raw_texture: vk::Image,
793 raw: vk::ImageView,
794 _layers: NonZeroU32,
795 format: wgt::TextureFormat,
796 raw_format: vk::Format,
797 base_mip_level: u32,
798 dimension: wgt::TextureViewDimension,
799 texture_identity: ResourceIdentity<vk::Image>,
800 view_identity: ResourceIdentity<vk::ImageView>,
801}
802
803impl crate::DynTextureView for TextureView {}
804
805impl TextureView {
806 /// # Safety
807 ///
808 /// - The image view handle must not be manually destroyed
809 pub unsafe fn raw_handle(&self) -> vk::ImageView {
810 self.raw
811 }
812
813 /// Returns the raw texture view, along with its identity.
814 fn identified_raw_view(&self) -> IdentifiedTextureView {
815 IdentifiedTextureView {
816 raw: self.raw,
817 identity: self.view_identity,
818 }
819 }
820}
821
822#[derive(Debug)]
823pub struct Sampler {
824 raw: vk::Sampler,
825 create_info: vk::SamplerCreateInfo<'static>,
826}
827
828impl crate::DynSampler for Sampler {}
829
830/// Information about a binding within a specific BindGroupLayout / BindGroup.
831/// This will be used to construct a [`naga::back::spv::BindingInfo`], where
832/// the descriptor set value will be taken from the index of the group.
833#[derive(Copy, Clone, Debug)]
834struct BindingInfo {
835 binding: u32,
836 binding_array_size: Option<NonZeroU32>,
837}
838
839#[derive(Debug)]
840pub struct BindGroupLayout {
841 raw: vk::DescriptorSetLayout,
842 desc_count: descriptor::DescriptorCounts,
843 /// Sorted list of entries.
844 entries: Box<[wgt::BindGroupLayoutEntry]>,
845 /// Map of original binding index to remapped binding index and optional
846 /// array size.
847 binding_map: Vec<(u32, BindingInfo)>,
848 contains_binding_arrays: bool,
849}
850
851impl crate::DynBindGroupLayout for BindGroupLayout {}
852
853#[derive(Debug)]
854pub struct PipelineLayout {
855 raw: vk::PipelineLayout,
856 binding_map: naga::back::spv::BindingMap,
857}
858
859impl crate::DynPipelineLayout for PipelineLayout {}
860
861#[derive(Debug)]
862pub struct BindGroup {
863 set: descriptor::DescriptorSet,
864}
865
866impl crate::DynBindGroup for BindGroup {}
867
868/// Miscellaneous allocation recycling pool for `CommandAllocator`.
869#[derive(Default)]
870struct Temp {
871 marker: Vec<u8>,
872 buffer_barriers: Vec<vk::BufferMemoryBarrier<'static>>,
873 image_barriers: Vec<vk::ImageMemoryBarrier<'static>>,
874}
875
876impl Temp {
877 fn clear(&mut self) {
878 self.marker.clear();
879 self.buffer_barriers.clear();
880 self.image_barriers.clear();
881 }
882
883 fn make_c_str(&mut self, name: &str) -> &CStr {
884 self.marker.clear();
885 self.marker.extend_from_slice(name.as_bytes());
886 self.marker.push(0);
887 unsafe { CStr::from_bytes_with_nul_unchecked(&self.marker) }
888 }
889}
890
891/// Generates unique IDs for each resource of type `T`.
892///
893/// Because vk handles are not permanently unique, this
894/// provides a way to generate unique IDs for each resource.
895struct ResourceIdentityFactory<T> {
896 #[cfg(not(target_has_atomic = "64"))]
897 next_id: Mutex<u64>,
898 #[cfg(target_has_atomic = "64")]
899 next_id: core::sync::atomic::AtomicU64,
900 _phantom: PhantomData<T>,
901}
902
903impl<T> ResourceIdentityFactory<T> {
904 fn new() -> Self {
905 Self {
906 #[cfg(not(target_has_atomic = "64"))]
907 next_id: Mutex::new(0),
908 #[cfg(target_has_atomic = "64")]
909 next_id: core::sync::atomic::AtomicU64::new(0),
910 _phantom: PhantomData,
911 }
912 }
913
914 /// Returns a new unique ID for a resource of type `T`.
915 fn next(&self) -> ResourceIdentity<T> {
916 #[cfg(not(target_has_atomic = "64"))]
917 {
918 let mut next_id = self.next_id.lock();
919 let id = *next_id;
920 *next_id += 1;
921 ResourceIdentity {
922 id,
923 _phantom: PhantomData,
924 }
925 }
926
927 #[cfg(target_has_atomic = "64")]
928 ResourceIdentity {
929 id: self
930 .next_id
931 .fetch_add(1, core::sync::atomic::Ordering::Relaxed),
932 _phantom: PhantomData,
933 }
934 }
935}
936
937/// A unique identifier for a resource of type `T`.
938///
939/// This is used as a hashable key for resources, which
940/// is permanently unique through the lifetime of the program.
941#[derive(Debug, Copy, Clone, Eq, Hash, PartialEq)]
942struct ResourceIdentity<T> {
943 id: u64,
944 _phantom: PhantomData<T>,
945}
946
947#[derive(Clone, Eq, Hash, PartialEq)]
948struct FramebufferKey {
949 raw_pass: vk::RenderPass,
950 /// Because this is used as a key in a hash map, we need to include the identity
951 /// so that this hashes differently, even if the ImageView handles are the same
952 /// between different views.
953 attachment_identities: ArrayVec<ResourceIdentity<vk::ImageView>, { MAX_TOTAL_ATTACHMENTS }>,
954 /// While this is redundant for calculating the hash, we need access to an array
955 /// of all the raw ImageViews when we are creating the actual framebuffer,
956 /// so we store this here.
957 attachment_views: ArrayVec<vk::ImageView, { MAX_TOTAL_ATTACHMENTS }>,
958 extent: wgt::Extent3d,
959}
960
961impl FramebufferKey {
962 fn push_view(&mut self, view: IdentifiedTextureView) {
963 self.attachment_identities.push(view.identity);
964 self.attachment_views.push(view.raw);
965 }
966}
967
968/// A texture view paired with its identity.
969#[derive(Copy, Clone)]
970struct IdentifiedTextureView {
971 raw: vk::ImageView,
972 identity: ResourceIdentity<vk::ImageView>,
973}
974
975#[derive(Clone, Eq, Hash, PartialEq)]
976struct TempTextureViewKey {
977 texture: vk::Image,
978 /// As this is used in a hashmap, we need to
979 /// include the identity so that this hashes differently,
980 /// even if the Image handles are the same between different images.
981 texture_identity: ResourceIdentity<vk::Image>,
982 format: vk::Format,
983 mip_level: u32,
984 depth_slice: u32,
985}
986
987pub struct CommandEncoder {
988 raw: vk::CommandPool,
989 device: Arc<DeviceShared>,
990
991 /// The current command buffer, if `self` is in the ["recording"]
992 /// state.
993 ///
994 /// ["recording"]: crate::CommandEncoder
995 ///
996 /// If non-`null`, the buffer is in the Vulkan "recording" state.
997 active: vk::CommandBuffer,
998
999 /// What kind of pass we are currently within: compute or render.
1000 bind_point: vk::PipelineBindPoint,
1001
1002 /// Allocation recycling pool for this encoder.
1003 temp: Temp,
1004
1005 /// A pool of available command buffers.
1006 ///
1007 /// These are all in the Vulkan "initial" state.
1008 free: Vec<vk::CommandBuffer>,
1009
1010 /// A pool of discarded command buffers.
1011 ///
1012 /// These could be in any Vulkan state except "pending".
1013 discarded: Vec<vk::CommandBuffer>,
1014
1015 /// If this is true, the active renderpass enabled a debug span,
1016 /// and needs to be disabled on renderpass close.
1017 rpass_debug_marker_active: bool,
1018
1019 /// If set, the end of the next render/compute pass will write a timestamp at
1020 /// the given pool & location.
1021 end_of_pass_timer_query: Option<(vk::QueryPool, u32)>,
1022
1023 framebuffers: FastHashMap<FramebufferKey, vk::Framebuffer>,
1024 temp_texture_views: FastHashMap<TempTextureViewKey, IdentifiedTextureView>,
1025
1026 counters: Arc<wgt::HalCounters>,
1027
1028 current_pipeline_is_multiview: bool,
1029}
1030
1031impl Drop for CommandEncoder {
1032 fn drop(&mut self) {
1033 // SAFETY:
1034 //
1035 // VUID-vkDestroyCommandPool-commandPool-00041: wgpu_hal requires that a
1036 // `CommandBuffer` must live until its execution is complete, and that a
1037 // `CommandBuffer` must not outlive the `CommandEncoder` that built it.
1038 // Thus, we know that none of our `CommandBuffers` are in the "pending"
1039 // state.
1040 //
1041 // The other VUIDs are pretty obvious.
1042 unsafe {
1043 // `vkDestroyCommandPool` also frees any command buffers allocated
1044 // from that pool, so there's no need to explicitly call
1045 // `vkFreeCommandBuffers` on `cmd_encoder`'s `free` and `discarded`
1046 // fields.
1047 self.device.raw.destroy_command_pool(self.raw, None);
1048 }
1049
1050 for (_, fb) in self.framebuffers.drain() {
1051 unsafe { self.device.raw.destroy_framebuffer(fb, None) };
1052 }
1053
1054 for (_, view) in self.temp_texture_views.drain() {
1055 unsafe { self.device.raw.destroy_image_view(view.raw, None) };
1056 }
1057
1058 self.counters.command_encoders.sub(1);
1059 }
1060}
1061
1062impl CommandEncoder {
1063 /// # Safety
1064 ///
1065 /// - The command buffer handle must not be manually destroyed
1066 pub unsafe fn raw_handle(&self) -> vk::CommandBuffer {
1067 self.active
1068 }
1069}
1070
1071impl fmt::Debug for CommandEncoder {
1072 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1073 f.debug_struct("CommandEncoder")
1074 .field("raw", &self.raw)
1075 .finish()
1076 }
1077}
1078
1079#[derive(Debug)]
1080pub struct CommandBuffer {
1081 raw: vk::CommandBuffer,
1082}
1083
1084impl crate::DynCommandBuffer for CommandBuffer {}
1085
1086#[derive(Debug)]
1087pub enum ShaderModule {
1088 Raw(vk::ShaderModule),
1089 Intermediate {
1090 naga_shader: crate::NagaShader,
1091 runtime_checks: wgt::ShaderRuntimeChecks,
1092 },
1093}
1094
1095impl crate::DynShaderModule for ShaderModule {}
1096
1097#[derive(Debug)]
1098pub struct RenderPipeline {
1099 raw: vk::Pipeline,
1100 is_multiview: bool,
1101}
1102
1103impl crate::DynRenderPipeline for RenderPipeline {}
1104
1105#[derive(Debug)]
1106pub struct ComputePipeline {
1107 raw: vk::Pipeline,
1108}
1109
1110impl crate::DynComputePipeline for ComputePipeline {}
1111
1112#[derive(Debug)]
1113pub struct PipelineCache {
1114 raw: vk::PipelineCache,
1115}
1116
1117impl crate::DynPipelineCache for PipelineCache {}
1118
1119#[derive(Debug)]
1120pub struct QuerySet {
1121 raw: vk::QueryPool,
1122}
1123
1124impl crate::DynQuerySet for QuerySet {}
1125
1126/// The [`Api::Fence`] type for [`vulkan::Api`].
1127///
1128/// This is an `enum` because there are two possible implementations of
1129/// `wgpu-hal` fences on Vulkan: Vulkan fences, which work on any version of
1130/// Vulkan, and Vulkan timeline semaphores, which are easier and cheaper but
1131/// require non-1.0 features.
1132///
1133/// [`Device::create_fence`] returns a [`TimelineSemaphore`] if
1134/// [`VK_KHR_timeline_semaphore`] is available and enabled, and a [`FencePool`]
1135/// otherwise.
1136///
1137/// [`Api::Fence`]: crate::Api::Fence
1138/// [`vulkan::Api`]: Api
1139/// [`Device::create_fence`]: crate::Device::create_fence
1140/// [`TimelineSemaphore`]: Fence::TimelineSemaphore
1141/// [`VK_KHR_timeline_semaphore`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VK_KHR_timeline_semaphore
1142/// [`FencePool`]: Fence::FencePool
1143#[derive(Debug)]
1144pub enum Fence {
1145 /// A Vulkan [timeline semaphore].
1146 ///
1147 /// These are simpler to use than Vulkan fences, since timeline semaphores
1148 /// work exactly the way [`wpgu_hal::Api::Fence`] is specified to work.
1149 ///
1150 /// [timeline semaphore]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-semaphores
1151 /// [`wpgu_hal::Api::Fence`]: crate::Api::Fence
1152 TimelineSemaphore(vk::Semaphore),
1153
1154 /// A collection of Vulkan [fence]s, each associated with a [`FenceValue`].
1155 ///
1156 /// The effective [`FenceValue`] of this variant is the greater of
1157 /// `last_completed` and the maximum value associated with a signalled fence
1158 /// in `active`.
1159 ///
1160 /// Fences are available in all versions of Vulkan, but since they only have
1161 /// two states, "signaled" and "unsignaled", we need to use a separate fence
1162 /// for each queue submission we might want to wait for, and remember which
1163 /// [`FenceValue`] each one represents.
1164 ///
1165 /// [fence]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-fences
1166 /// [`FenceValue`]: crate::FenceValue
1167 FencePool {
1168 last_completed: crate::FenceValue,
1169 /// The pending fence values have to be ascending.
1170 active: Vec<(crate::FenceValue, vk::Fence)>,
1171 free: Vec<vk::Fence>,
1172 },
1173}
1174
1175impl crate::DynFence for Fence {}
1176
1177impl Fence {
1178 /// Return the highest [`FenceValue`] among the signalled fences in `active`.
1179 ///
1180 /// As an optimization, assume that we already know that the fence has
1181 /// reached `last_completed`, and don't bother checking fences whose values
1182 /// are less than that: those fences remain in the `active` array only
1183 /// because we haven't called `maintain` yet to clean them up.
1184 ///
1185 /// [`FenceValue`]: crate::FenceValue
1186 fn check_active(
1187 device: &ash::Device,
1188 mut last_completed: crate::FenceValue,
1189 active: &[(crate::FenceValue, vk::Fence)],
1190 ) -> Result<crate::FenceValue, crate::DeviceError> {
1191 for &(value, raw) in active.iter() {
1192 unsafe {
1193 if value > last_completed
1194 && device
1195 .get_fence_status(raw)
1196 .map_err(map_host_device_oom_and_lost_err)?
1197 {
1198 last_completed = value;
1199 }
1200 }
1201 }
1202 Ok(last_completed)
1203 }
1204
1205 /// Return the highest signalled [`FenceValue`] for `self`.
1206 ///
1207 /// [`FenceValue`]: crate::FenceValue
1208 fn get_latest(
1209 &self,
1210 device: &ash::Device,
1211 extension: Option<&ExtensionFn<khr::timeline_semaphore::Device>>,
1212 ) -> Result<crate::FenceValue, crate::DeviceError> {
1213 match *self {
1214 Self::TimelineSemaphore(raw) => unsafe {
1215 Ok(match *extension.unwrap() {
1216 ExtensionFn::Extension(ref ext) => ext
1217 .get_semaphore_counter_value(raw)
1218 .map_err(map_host_device_oom_and_lost_err)?,
1219 ExtensionFn::Promoted => device
1220 .get_semaphore_counter_value(raw)
1221 .map_err(map_host_device_oom_and_lost_err)?,
1222 })
1223 },
1224 Self::FencePool {
1225 last_completed,
1226 ref active,
1227 free: _,
1228 } => Self::check_active(device, last_completed, active),
1229 }
1230 }
1231
1232 /// Trim the internal state of this [`Fence`].
1233 ///
1234 /// This function has no externally visible effect, but you should call it
1235 /// periodically to keep this fence's resource consumption under control.
1236 ///
1237 /// For fences using the [`FencePool`] implementation, this function
1238 /// recycles fences that have been signaled. If you don't call this,
1239 /// [`Queue::submit`] will just keep allocating a new Vulkan fence every
1240 /// time it's called.
1241 ///
1242 /// [`FencePool`]: Fence::FencePool
1243 /// [`Queue::submit`]: crate::Queue::submit
1244 fn maintain(&mut self, device: &ash::Device) -> Result<(), crate::DeviceError> {
1245 match *self {
1246 Self::TimelineSemaphore(_) => {}
1247 Self::FencePool {
1248 ref mut last_completed,
1249 ref mut active,
1250 ref mut free,
1251 } => {
1252 let latest = Self::check_active(device, *last_completed, active)?;
1253 let base_free = free.len();
1254 for &(value, raw) in active.iter() {
1255 if value <= latest {
1256 free.push(raw);
1257 }
1258 }
1259 if free.len() != base_free {
1260 active.retain(|&(value, _)| value > latest);
1261 unsafe { device.reset_fences(&free[base_free..]) }
1262 .map_err(map_device_oom_err)?
1263 }
1264 *last_completed = latest;
1265 }
1266 }
1267 Ok(())
1268 }
1269}
1270
1271impl crate::Queue for Queue {
1272 type A = Api;
1273
1274 unsafe fn submit(
1275 &self,
1276 command_buffers: &[&CommandBuffer],
1277 surface_textures: &[&SurfaceTexture],
1278 (signal_fence, signal_value): (&mut Fence, crate::FenceValue),
1279 ) -> Result<(), crate::DeviceError> {
1280 let mut fence_raw = vk::Fence::null();
1281
1282 let mut wait_semaphores = SemaphoreList::new(SemaphoreListMode::Wait);
1283 let mut signal_semaphores = SemaphoreList::new(SemaphoreListMode::Signal);
1284
1285 // Double check that the same swapchain image isn't being given to us multiple times,
1286 // as that will deadlock when we try to lock them all.
1287 debug_assert!(
1288 {
1289 let mut check = HashSet::with_capacity(surface_textures.len());
1290 // We compare the Box by pointer, as Eq isn't well defined for SurfaceSemaphores.
1291 for st in surface_textures {
1292 let ptr: *const () = <*const _>::cast(&*st.metadata);
1293 check.insert(ptr as usize);
1294 }
1295 check.len() == surface_textures.len()
1296 },
1297 "More than one surface texture is being used from the same swapchain. This will cause a deadlock in release."
1298 );
1299
1300 let locked_swapchain_semaphores = surface_textures
1301 .iter()
1302 .map(|st| st.metadata.get_semaphore_guard())
1303 .collect::<Vec<_>>();
1304
1305 for mut semaphores in locked_swapchain_semaphores {
1306 semaphores.set_used_fence_value(signal_value);
1307
1308 // If we're the first submission to operate on this image, wait on
1309 // its acquire semaphore, to make sure the presentation engine is
1310 // done with it.
1311 if let Some(sem) = semaphores.get_acquire_wait_semaphore() {
1312 wait_semaphores.push_wait(sem, vk::PipelineStageFlags::TOP_OF_PIPE);
1313 }
1314
1315 // Get a semaphore to signal when we're done writing to this surface
1316 // image. Presentation of this image will wait for this.
1317 let signal_semaphore = semaphores.get_submit_signal_semaphore(&self.device)?;
1318 signal_semaphores.push_signal(signal_semaphore);
1319 }
1320
1321 let mut guard = self.signal_semaphores.lock();
1322 if !guard.is_empty() {
1323 signal_semaphores.append(&mut guard);
1324 }
1325
1326 let mut wait_guard = self.wait_semaphores.lock();
1327 if !wait_guard.is_empty() {
1328 wait_semaphores.append(&mut wait_guard);
1329 }
1330
1331 // In order for submissions to be strictly ordered, we encode a dependency between each submission
1332 // using a pair of semaphores. This adds a wait if it is needed, and signals the next semaphore.
1333 let semaphore_state = self.relay_semaphores.lock().advance(&self.device)?;
1334
1335 if let Some(sem) = semaphore_state.wait {
1336 wait_semaphores.push_wait(
1337 SemaphoreType::Binary(sem),
1338 vk::PipelineStageFlags::TOP_OF_PIPE,
1339 );
1340 }
1341
1342 signal_semaphores.push_signal(SemaphoreType::Binary(semaphore_state.signal));
1343
1344 // We need to signal our wgpu::Fence if we have one, this adds it to the signal list.
1345 signal_fence.maintain(&self.device.raw)?;
1346 match *signal_fence {
1347 Fence::TimelineSemaphore(raw) => {
1348 signal_semaphores.push_signal(SemaphoreType::Timeline(raw, signal_value));
1349 }
1350 Fence::FencePool {
1351 ref mut active,
1352 ref mut free,
1353 ..
1354 } => {
1355 fence_raw = match free.pop() {
1356 Some(raw) => raw,
1357 None => unsafe {
1358 self.device
1359 .raw
1360 .create_fence(&vk::FenceCreateInfo::default(), None)
1361 .map_err(map_host_device_oom_err)?
1362 },
1363 };
1364 active.push((signal_value, fence_raw));
1365 }
1366 }
1367
1368 let vk_cmd_buffers = command_buffers
1369 .iter()
1370 .map(|cmd| cmd.raw)
1371 .collect::<Vec<_>>();
1372
1373 let mut vk_info = vk::SubmitInfo::default().command_buffers(&vk_cmd_buffers);
1374 let mut vk_timeline_info = mem::MaybeUninit::uninit();
1375 vk_info = SemaphoreList::add_to_submit(
1376 &mut wait_semaphores,
1377 &mut signal_semaphores,
1378 vk_info,
1379 &mut vk_timeline_info,
1380 );
1381
1382 profiling::scope!("vkQueueSubmit");
1383 unsafe {
1384 self.device
1385 .raw
1386 .queue_submit(self.raw, &[vk_info], fence_raw)
1387 .map_err(map_host_device_oom_and_lost_err)?
1388 };
1389 Ok(())
1390 }
1391
1392 unsafe fn present(
1393 &self,
1394 surface: &Surface,
1395 texture: SurfaceTexture,
1396 ) -> Result<(), crate::SurfaceError> {
1397 let mut swapchain = surface.swapchain.write();
1398
1399 unsafe { swapchain.as_mut().unwrap().present(self, texture) }
1400 }
1401
1402 unsafe fn get_timestamp_period(&self) -> f32 {
1403 self.device.timestamp_period
1404 }
1405
1406 unsafe fn wait_for_idle(&self) -> Result<(), crate::DeviceError> {
1407 unsafe { self.device.raw.queue_wait_idle(self.raw) }
1408 .map_err(map_host_device_oom_and_lost_err)
1409 }
1410}
1411
1412impl Queue {
1413 pub fn raw_device(&self) -> &ash::Device {
1414 &self.device.raw
1415 }
1416
1417 pub fn add_signal_semaphore(&self, semaphore: vk::Semaphore, semaphore_value: Option<u64>) {
1418 let mut guard = self.signal_semaphores.lock();
1419 if let Some(value) = semaphore_value {
1420 guard.push_signal(SemaphoreType::Timeline(semaphore, value));
1421 } else {
1422 guard.push_signal(SemaphoreType::Binary(semaphore));
1423 }
1424 }
1425
1426 /// Remove `semaphore` from the pending signal list if it is still present.
1427 ///
1428 /// Returns `true` if the semaphore was found and removed. If the submit
1429 /// already consumed it, this is a harmless no-op that returns `false`.
1430 pub fn remove_signal_semaphore(&self, semaphore: vk::Semaphore) -> bool {
1431 self.signal_semaphores.lock().remove(semaphore)
1432 }
1433
1434 /// Stage a semaphore wait on the next [`crate::Queue::submit`] call.
1435 ///
1436 /// `semaphore_value` selects the kind of payload the wait targets:
1437 ///
1438 /// - `Some(value)` - wait until `semaphore` (a timeline semaphore) has been signalled to at least `value`.
1439 /// - `None` - wait on a binary semaphore signal.
1440 ///
1441 /// `stage` is the pipeline stage at which the wait blocks downstream
1442 /// work (e.g. `vk::PipelineStageFlags::TOP_OF_PIPE` to gate the
1443 /// entire submission, or a more specific stage when only that stage
1444 /// reads the synchronised resource).
1445 pub fn add_wait_semaphore(
1446 &self,
1447 semaphore: vk::Semaphore,
1448 semaphore_value: Option<u64>,
1449 stage: vk::PipelineStageFlags,
1450 ) {
1451 let mut guard = self.wait_semaphores.lock();
1452 if let Some(value) = semaphore_value {
1453 guard.push_wait(SemaphoreType::Timeline(semaphore, value), stage);
1454 } else {
1455 guard.push_wait(SemaphoreType::Binary(semaphore), stage);
1456 }
1457 }
1458
1459 /// Remove `semaphore` from the pending wait list if it is still present.
1460 ///
1461 /// Returns `true` if the semaphore was found and removed. If the submit
1462 /// already consumed it, this is a no-op that returns `false`.
1463 pub fn remove_wait_semaphore(&self, semaphore: vk::Semaphore) -> bool {
1464 self.wait_semaphores.lock().remove(semaphore)
1465 }
1466}
1467
1468/// Maps
1469///
1470/// - VK_ERROR_OUT_OF_HOST_MEMORY
1471/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1472fn map_host_device_oom_err(err: vk::Result) -> crate::DeviceError {
1473 match err {
1474 vk::Result::ERROR_OUT_OF_HOST_MEMORY | vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => {
1475 get_oom_err(err)
1476 }
1477 e => get_unexpected_err(e),
1478 }
1479}
1480
1481/// Maps
1482///
1483/// - VK_ERROR_OUT_OF_HOST_MEMORY
1484/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1485/// - VK_ERROR_DEVICE_LOST
1486fn map_host_device_oom_and_lost_err(err: vk::Result) -> crate::DeviceError {
1487 match err {
1488 vk::Result::ERROR_DEVICE_LOST => get_lost_err(),
1489 other => map_host_device_oom_err(other),
1490 }
1491}
1492
1493/// Maps
1494///
1495/// - VK_ERROR_OUT_OF_HOST_MEMORY
1496/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1497/// - VK_ERROR_FRAGMENTATION
1498fn map_host_device_oom_and_fragmentation_err(err: vk::Result) -> crate::DeviceError {
1499 match err {
1500 vk::Result::ERROR_FRAGMENTATION => get_oom_err(err),
1501 other => map_host_device_oom_err(other),
1502 }
1503}
1504
1505/// Maps
1506///
1507/// - VK_ERROR_OUT_OF_HOST_MEMORY
1508/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1509/// - VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1510fn map_host_device_oom_and_ioca_err(err: vk::Result) -> crate::DeviceError {
1511 // We don't use VK_KHR_buffer_device_address
1512 // VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1513 map_host_device_oom_err(err)
1514}
1515
1516/// Maps
1517///
1518/// - VK_ERROR_OUT_OF_HOST_MEMORY
1519fn map_host_oom_err(err: vk::Result) -> crate::DeviceError {
1520 match err {
1521 vk::Result::ERROR_OUT_OF_HOST_MEMORY => get_oom_err(err),
1522 e => get_unexpected_err(e),
1523 }
1524}
1525
1526/// Maps
1527///
1528/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1529fn map_device_oom_err(err: vk::Result) -> crate::DeviceError {
1530 match err {
1531 vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => get_oom_err(err),
1532 e => get_unexpected_err(e),
1533 }
1534}
1535
1536/// Maps
1537///
1538/// - VK_ERROR_OUT_OF_HOST_MEMORY
1539/// - VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1540fn map_host_oom_and_ioca_err(err: vk::Result) -> crate::DeviceError {
1541 // We don't use VK_KHR_buffer_device_address
1542 // VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1543 map_host_oom_err(err)
1544}
1545
1546/// Maps
1547///
1548/// - VK_ERROR_OUT_OF_HOST_MEMORY
1549/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1550/// - VK_PIPELINE_COMPILE_REQUIRED_EXT
1551/// - VK_ERROR_INVALID_SHADER_NV
1552fn map_pipeline_err(err: vk::Result) -> crate::DeviceError {
1553 // We don't use VK_EXT_pipeline_creation_cache_control
1554 // VK_PIPELINE_COMPILE_REQUIRED_EXT
1555 // We don't use VK_NV_glsl_shader
1556 // VK_ERROR_INVALID_SHADER_NV
1557 map_host_device_oom_err(err)
1558}
1559
1560/// Returns [`crate::DeviceError::Unexpected`] or panics if the `internal_error_panic`
1561/// feature flag is enabled.
1562fn get_unexpected_err(_err: vk::Result) -> crate::DeviceError {
1563 #[cfg(feature = "internal_error_panic")]
1564 panic!("Unexpected Vulkan error: {_err:?}");
1565
1566 #[allow(unreachable_code)]
1567 crate::DeviceError::Unexpected
1568}
1569
1570/// Returns [`crate::DeviceError::OutOfMemory`].
1571fn get_oom_err(_err: vk::Result) -> crate::DeviceError {
1572 crate::DeviceError::OutOfMemory
1573}
1574
1575/// Returns [`crate::DeviceError::Lost`] or panics if the `device_lost_panic`
1576/// feature flag is enabled.
1577fn get_lost_err() -> crate::DeviceError {
1578 #[cfg(feature = "device_lost_panic")]
1579 panic!("Device lost");
1580
1581 #[allow(unreachable_code)]
1582 crate::DeviceError::Lost
1583}
1584
1585#[derive(Clone, Copy, Pod, Zeroable)]
1586#[repr(C)]
1587struct RawTlasInstance {
1588 transform: [f32; 12],
1589 custom_data_and_mask: u32,
1590 shader_binding_table_record_offset_and_flags: u32,
1591 acceleration_structure_reference: u64,
1592}
1593
1594/// Arguments to the [`CreateDeviceCallback`].
1595pub struct CreateDeviceCallbackArgs<'arg, 'pnext, 'this>
1596where
1597 'this: 'pnext,
1598{
1599 /// The extensions to enable for the device. You must not remove anything from this list,
1600 /// but you may add to it.
1601 pub extensions: &'arg mut Vec<&'static CStr>,
1602 /// The physical device features to enable. You may enable features, but must not disable any.
1603 pub device_features: &'arg mut PhysicalDeviceFeatures,
1604 /// The queue create infos for the device. You may add or modify queue create infos as needed.
1605 pub queue_create_infos: &'arg mut Vec<vk::DeviceQueueCreateInfo<'pnext>>,
1606 /// The create info for the device. You may add or modify things in the pnext chain, but
1607 /// do not turn features off. Additionally, do not add things to the list of extensions,
1608 /// or to the feature set, as all changes to that member will be overwritten.
1609 pub create_info: &'arg mut vk::DeviceCreateInfo<'pnext>,
1610 /// We need to have `'this` in the struct, so we can declare that all lifetimes coming from
1611 /// captures in the closure will live longer (and hence satisfy) `'pnext`. However, we
1612 /// don't actually directly use `'this`
1613 _phantom: PhantomData<&'this ()>,
1614}
1615
1616/// Callback to allow changing the vulkan device creation parameters.
1617///
1618/// # Safety:
1619/// - If you want to add extensions, add the to the `Vec<'static CStr>` not the create info,
1620/// as the create info value will be overwritten.
1621/// - Callback must not remove features.
1622/// - Callback must not change anything to what the instance does not support.
1623pub type CreateDeviceCallback<'this> =
1624 dyn for<'arg, 'pnext> FnOnce(CreateDeviceCallbackArgs<'arg, 'pnext, 'this>) + 'this;
1625
1626/// Arguments to the [`CreateInstanceCallback`].
1627pub struct CreateInstanceCallbackArgs<'arg, 'pnext, 'this>
1628where
1629 'this: 'pnext,
1630{
1631 /// The extensions to enable for the instance. You must not remove anything from this list,
1632 /// but you may add to it.
1633 pub extensions: &'arg mut Vec<&'static CStr>,
1634 /// The create info for the instance. You may add or modify things in the pnext chain, but
1635 /// do not turn features off. Additionally, do not add things to the list of extensions,
1636 /// all changes to that member will be overwritten.
1637 pub create_info: &'arg mut vk::InstanceCreateInfo<'pnext>,
1638 /// Vulkan entry point.
1639 pub entry: &'arg ash::Entry,
1640 /// We need to have `'this` in the struct, so we can declare that all lifetimes coming from
1641 /// captures in the closure will live longer (and hence satisfy) `'pnext`. However, we
1642 /// don't actually directly use `'this`
1643 _phantom: PhantomData<&'this ()>,
1644}
1645
1646/// Callback to allow changing the vulkan instance creation parameters.
1647///
1648/// # Safety:
1649/// - If you want to add extensions, add the to the `Vec<'static CStr>` not the create info,
1650/// as the create info value will be overwritten.
1651/// - Callback must not remove features.
1652/// - Callback must not change anything to what the instance does not support.
1653pub type CreateInstanceCallback<'this> =
1654 dyn for<'arg, 'pnext> FnOnce(CreateInstanceCallbackArgs<'arg, 'pnext, 'this>) + 'this;