wgpu_hal/vulkan/mod.rs
1/*!
2# Vulkan API internals.
3
4## Stack memory
5
6Ash expects slices, which we don't generally have available.
7We cope with this requirement by the combination of the following ways:
8 - temporarily allocating `Vec` on heap, where overhead is permitted
9 - growing temporary local storage
10
11## Framebuffers and Render passes
12
13Render passes are cached on the device and kept forever.
14
15Framebuffers are also cached on the device, but they are removed when
16any of the image views (they have) gets removed.
17If Vulkan supports image-less framebuffers,
18then the actual views are excluded from the framebuffer key.
19
20## Fences
21
22If timeline semaphores are available, they are used 1:1 with wgpu-hal fences.
23Otherwise, we manage a pool of `VkFence` objects behind each `hal::Fence`.
24
25!*/
26
27mod adapter;
28mod command;
29mod conv;
30mod device;
31mod drm;
32mod instance;
33mod sampler;
34mod semaphore_list;
35
36pub use adapter::PhysicalDeviceFeatures;
37
38use alloc::{boxed::Box, ffi::CString, sync::Arc, vec::Vec};
39use core::{borrow::Borrow, ffi::CStr, fmt, marker::PhantomData, mem, num::NonZeroU32};
40
41use arrayvec::ArrayVec;
42use ash::{ext, khr, vk};
43use bytemuck::{Pod, Zeroable};
44use hashbrown::HashSet;
45use parking_lot::{Mutex, RwLock};
46
47use naga::FastHashMap;
48use wgt::InternalCounter;
49
50use semaphore_list::SemaphoreList;
51
52const MILLIS_TO_NANOS: u64 = 1_000_000;
53const MAX_TOTAL_ATTACHMENTS: usize = crate::MAX_COLOR_ATTACHMENTS * 2 + 1;
54
55#[derive(Clone, Debug)]
56pub struct Api;
57
58impl crate::Api for Api {
59 const VARIANT: wgt::Backend = wgt::Backend::Vulkan;
60
61 type Instance = Instance;
62 type Surface = Surface;
63 type Adapter = Adapter;
64 type Device = Device;
65
66 type Queue = Queue;
67 type CommandEncoder = CommandEncoder;
68 type CommandBuffer = CommandBuffer;
69
70 type Buffer = Buffer;
71 type Texture = Texture;
72 type SurfaceTexture = SurfaceTexture;
73 type TextureView = TextureView;
74 type Sampler = Sampler;
75 type QuerySet = QuerySet;
76 type Fence = Fence;
77 type AccelerationStructure = AccelerationStructure;
78 type PipelineCache = PipelineCache;
79
80 type BindGroupLayout = BindGroupLayout;
81 type BindGroup = BindGroup;
82 type PipelineLayout = PipelineLayout;
83 type ShaderModule = ShaderModule;
84 type RenderPipeline = RenderPipeline;
85 type ComputePipeline = ComputePipeline;
86}
87
88crate::impl_dyn_resource!(
89 Adapter,
90 AccelerationStructure,
91 BindGroup,
92 BindGroupLayout,
93 Buffer,
94 CommandBuffer,
95 CommandEncoder,
96 ComputePipeline,
97 Device,
98 Fence,
99 Instance,
100 PipelineCache,
101 PipelineLayout,
102 QuerySet,
103 Queue,
104 RenderPipeline,
105 Sampler,
106 ShaderModule,
107 Surface,
108 SurfaceTexture,
109 Texture,
110 TextureView
111);
112
113struct DebugUtils {
114 extension: ext::debug_utils::Instance,
115 messenger: vk::DebugUtilsMessengerEXT,
116
117 /// Owning pointer to the debug messenger callback user data.
118 ///
119 /// `InstanceShared::drop` destroys the debug messenger before
120 /// dropping this, so the callback should never receive a dangling
121 /// user data pointer.
122 #[allow(dead_code)]
123 callback_data: Box<DebugUtilsMessengerUserData>,
124}
125
126pub struct DebugUtilsCreateInfo {
127 severity: vk::DebugUtilsMessageSeverityFlagsEXT,
128 message_type: vk::DebugUtilsMessageTypeFlagsEXT,
129 callback_data: Box<DebugUtilsMessengerUserData>,
130}
131
132#[derive(Debug)]
133/// The properties related to the validation layer needed for the
134/// DebugUtilsMessenger for their workarounds
135struct ValidationLayerProperties {
136 /// Validation layer description, from `vk::LayerProperties`.
137 layer_description: CString,
138
139 /// Validation layer specification version, from `vk::LayerProperties`.
140 layer_spec_version: u32,
141}
142
143/// User data needed by `instance::debug_utils_messenger_callback`.
144///
145/// When we create the [`vk::DebugUtilsMessengerEXT`], the `pUserData`
146/// pointer refers to one of these values.
147#[derive(Debug)]
148pub struct DebugUtilsMessengerUserData {
149 /// The properties related to the validation layer, if present
150 validation_layer_properties: Option<ValidationLayerProperties>,
151
152 /// If the OBS layer is present. OBS never increments the version of their layer,
153 /// so there's no reason to have the version.
154 has_obs_layer: bool,
155}
156
157pub struct InstanceShared {
158 raw: ash::Instance,
159 extensions: Vec<&'static CStr>,
160 drop_guard: Option<crate::DropGuard>,
161 flags: wgt::InstanceFlags,
162 memory_budget_thresholds: wgt::MemoryBudgetThresholds,
163 debug_utils: Option<DebugUtils>,
164 get_physical_device_properties: Option<khr::get_physical_device_properties2::Instance>,
165 entry: ash::Entry,
166 has_nv_optimus: bool,
167 android_sdk_version: u32,
168 /// The instance API version.
169 ///
170 /// Which is the version of Vulkan supported for instance-level functionality.
171 ///
172 /// It is associated with a `VkInstance` and its children,
173 /// except for a `VkPhysicalDevice` and its children.
174 instance_api_version: u32,
175}
176
177pub struct Instance {
178 shared: Arc<InstanceShared>,
179}
180
181/// Semaphore used to acquire a swapchain image.
182#[derive(Debug)]
183struct SwapchainAcquireSemaphore {
184 /// A semaphore that is signaled when this image is safe for us to modify.
185 ///
186 /// When [`vkAcquireNextImageKHR`] returns the index of the next swapchain
187 /// image that we should use, that image may actually still be in use by the
188 /// presentation engine, and is not yet safe to modify. However, that
189 /// function does accept a semaphore that it will signal when the image is
190 /// indeed safe to begin messing with.
191 ///
192 /// This semaphore is:
193 ///
194 /// - waited for by the first queue submission to operate on this image
195 /// since it was acquired, and
196 ///
197 /// - signaled by [`vkAcquireNextImageKHR`] when the acquired image is ready
198 /// for us to use.
199 ///
200 /// [`vkAcquireNextImageKHR`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkAcquireNextImageKHR
201 acquire: vk::Semaphore,
202
203 /// True if the next command submission operating on this image should wait
204 /// for [`acquire`].
205 ///
206 /// We must wait for `acquire` before drawing to this swapchain image, but
207 /// because `wgpu-hal` queue submissions are always strongly ordered, only
208 /// the first submission that works with a swapchain image actually needs to
209 /// wait. We set this flag when this image is acquired, and clear it the
210 /// first time it's passed to [`Queue::submit`] as a surface texture.
211 ///
212 /// Additionally, semaphores can only be waited on once, so we need to ensure
213 /// that we only actually pass this semaphore to the first submission that
214 /// uses that image.
215 ///
216 /// [`acquire`]: SwapchainAcquireSemaphore::acquire
217 /// [`Queue::submit`]: crate::Queue::submit
218 should_wait_for_acquire: bool,
219
220 /// The fence value of the last command submission that wrote to this image.
221 ///
222 /// The next time we try to acquire this image, we'll block until
223 /// this submission finishes, proving that [`acquire`] is ready to
224 /// pass to `vkAcquireNextImageKHR` again.
225 ///
226 /// [`acquire`]: SwapchainAcquireSemaphore::acquire
227 previously_used_submission_index: crate::FenceValue,
228}
229
230impl SwapchainAcquireSemaphore {
231 fn new(device: &DeviceShared, index: usize) -> Result<Self, crate::DeviceError> {
232 Ok(Self {
233 acquire: device
234 .new_binary_semaphore(&format!("SwapchainImageSemaphore: Index {index} acquire"))?,
235 should_wait_for_acquire: true,
236 previously_used_submission_index: 0,
237 })
238 }
239
240 /// Sets the fence value which the next acquire will wait for. This prevents
241 /// the semaphore from being used while the previous submission is still in flight.
242 fn set_used_fence_value(&mut self, value: crate::FenceValue) {
243 self.previously_used_submission_index = value;
244 }
245
246 /// Return the semaphore that commands drawing to this image should wait for, if any.
247 ///
248 /// This only returns `Some` once per acquisition; see
249 /// [`SwapchainAcquireSemaphore::should_wait_for_acquire`] for details.
250 fn get_acquire_wait_semaphore(&mut self) -> Option<vk::Semaphore> {
251 if self.should_wait_for_acquire {
252 self.should_wait_for_acquire = false;
253 Some(self.acquire)
254 } else {
255 None
256 }
257 }
258
259 /// Indicates the cpu-side usage of this semaphore has finished for the frame,
260 /// so reset internal state to be ready for the next frame.
261 fn end_semaphore_usage(&mut self) {
262 // Reset the acquire semaphore, so that the next time we acquire this
263 // image, we can wait for it again.
264 self.should_wait_for_acquire = true;
265 }
266
267 unsafe fn destroy(&self, device: &ash::Device) {
268 unsafe {
269 device.destroy_semaphore(self.acquire, None);
270 }
271 }
272}
273
274#[derive(Debug)]
275struct SwapchainPresentSemaphores {
276 /// A pool of semaphores for ordering presentation after drawing.
277 ///
278 /// The first [`present_index`] semaphores in this vector are:
279 ///
280 /// - all waited on by the call to [`vkQueuePresentKHR`] that presents this
281 /// image, and
282 ///
283 /// - each signaled by some [`vkQueueSubmit`] queue submission that draws to
284 /// this image, when the submission finishes execution.
285 ///
286 /// This vector accumulates one semaphore per submission that writes to this
287 /// image. This is awkward, but hard to avoid: [`vkQueuePresentKHR`]
288 /// requires a semaphore to order it with respect to drawing commands, and
289 /// we can't attach new completion semaphores to a command submission after
290 /// it's been submitted. This means that, at submission time, we must create
291 /// the semaphore we might need if the caller's next action is to enqueue a
292 /// presentation of this image.
293 ///
294 /// An alternative strategy would be for presentation to enqueue an empty
295 /// submit, ordered relative to other submits in the usual way, and
296 /// signaling a single presentation semaphore. But we suspect that submits
297 /// are usually expensive enough, and semaphores usually cheap enough, that
298 /// performance-sensitive users will avoid making many submits, so that the
299 /// cost of accumulated semaphores will usually be less than the cost of an
300 /// additional submit.
301 ///
302 /// Only the first [`present_index`] semaphores in the vector are actually
303 /// going to be signalled by submitted commands, and need to be waited for
304 /// by the next present call. Any semaphores beyond that index were created
305 /// for prior presents and are simply being retained for recycling.
306 ///
307 /// [`present_index`]: SwapchainPresentSemaphores::present_index
308 /// [`vkQueuePresentKHR`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueuePresentKHR
309 /// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
310 present: Vec<vk::Semaphore>,
311
312 /// The number of semaphores in [`present`] to be signalled for this submission.
313 ///
314 /// [`present`]: SwapchainPresentSemaphores::present
315 present_index: usize,
316
317 /// Which image this semaphore set is used for.
318 frame_index: usize,
319}
320
321impl SwapchainPresentSemaphores {
322 pub fn new(frame_index: usize) -> Self {
323 Self {
324 present: Vec::new(),
325 present_index: 0,
326 frame_index,
327 }
328 }
329
330 /// Return the semaphore that the next submission that writes to this image should
331 /// signal when it's done.
332 ///
333 /// See [`SwapchainPresentSemaphores::present`] for details.
334 fn get_submit_signal_semaphore(
335 &mut self,
336 device: &DeviceShared,
337 ) -> Result<vk::Semaphore, crate::DeviceError> {
338 // Try to recycle a semaphore we created for a previous presentation.
339 let sem = match self.present.get(self.present_index) {
340 Some(sem) => *sem,
341 None => {
342 let sem = device.new_binary_semaphore(&format!(
343 "SwapchainImageSemaphore: Image {} present semaphore {}",
344 self.frame_index, self.present_index
345 ))?;
346 self.present.push(sem);
347 sem
348 }
349 };
350
351 self.present_index += 1;
352
353 Ok(sem)
354 }
355
356 /// Indicates the cpu-side usage of this semaphore has finished for the frame,
357 /// so reset internal state to be ready for the next frame.
358 fn end_semaphore_usage(&mut self) {
359 // Reset the index to 0, so that the next time we get a semaphore, we
360 // start from the beginning of the list.
361 self.present_index = 0;
362 }
363
364 /// Return the semaphores that a presentation of this image should wait on.
365 ///
366 /// Return a slice of semaphores that the call to [`vkQueueSubmit`] that
367 /// ends this image's acquisition should wait for. See
368 /// [`SwapchainPresentSemaphores::present`] for details.
369 ///
370 /// Reset `self` to be ready for the next acquisition cycle.
371 ///
372 /// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
373 fn get_present_wait_semaphores(&mut self) -> Vec<vk::Semaphore> {
374 self.present[0..self.present_index].to_vec()
375 }
376
377 unsafe fn destroy(&self, device: &ash::Device) {
378 unsafe {
379 for sem in &self.present {
380 device.destroy_semaphore(*sem, None);
381 }
382 }
383 }
384}
385
386struct Swapchain {
387 raw: vk::SwapchainKHR,
388 functor: khr::swapchain::Device,
389 device: Arc<DeviceShared>,
390 images: Vec<vk::Image>,
391 config: crate::SurfaceConfiguration,
392
393 /// Semaphores used between image acquisition and the first submission
394 /// that uses that image. This is indexed using [`next_acquire_index`].
395 ///
396 /// Because we need to provide this to [`vkAcquireNextImageKHR`], we haven't
397 /// received the swapchain image index for the frame yet, so we cannot use
398 /// that to index it.
399 ///
400 /// Before we pass this to [`vkAcquireNextImageKHR`], we ensure that we wait on
401 /// the submission indicated by [`previously_used_submission_index`]. This enusres
402 /// the semaphore is no longer in use before we use it.
403 ///
404 /// [`next_acquire_index`]: Swapchain::next_acquire_index
405 /// [`vkAcquireNextImageKHR`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkAcquireNextImageKHR
406 /// [`previously_used_submission_index`]: SwapchainAcquireSemaphore::previously_used_submission_index
407 acquire_semaphores: Vec<Arc<Mutex<SwapchainAcquireSemaphore>>>,
408 /// The index of the next acquire semaphore to use.
409 ///
410 /// This is incremented each time we acquire a new image, and wraps around
411 /// to 0 when it reaches the end of [`acquire_semaphores`].
412 ///
413 /// [`acquire_semaphores`]: Swapchain::acquire_semaphores
414 next_acquire_index: usize,
415
416 /// Semaphore sets used between all submissions that write to an image and
417 /// the presentation of that image.
418 ///
419 /// This is indexed by the swapchain image index returned by
420 /// [`vkAcquireNextImageKHR`].
421 ///
422 /// We know it is safe to use these semaphores because use them
423 /// _after_ the acquire semaphore. Because the acquire semaphore
424 /// has been signaled, the previous presentation using that image
425 /// is known-finished, so this semaphore is no longer in use.
426 ///
427 /// [`vkAcquireNextImageKHR`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkAcquireNextImageKHR
428 present_semaphores: Vec<Arc<Mutex<SwapchainPresentSemaphores>>>,
429
430 /// The present timing information which will be set in the next call to [`present()`](crate::Queue::present()).
431 ///
432 /// # Safety
433 ///
434 /// This must only be set if [`wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING`] is enabled, and
435 /// so the VK_GOOGLE_display_timing extension is present.
436 next_present_time: Option<vk::PresentTimeGOOGLE>,
437}
438
439impl Swapchain {
440 /// Mark the current frame finished, advancing to the next acquire semaphore.
441 fn advance_acquire_semaphore(&mut self) {
442 let semaphore_count = self.acquire_semaphores.len();
443 self.next_acquire_index = (self.next_acquire_index + 1) % semaphore_count;
444 }
445
446 /// Get the next acquire semaphore that should be used with this swapchain.
447 fn get_acquire_semaphore(&self) -> Arc<Mutex<SwapchainAcquireSemaphore>> {
448 self.acquire_semaphores[self.next_acquire_index].clone()
449 }
450
451 /// Get the set of present semaphores that should be used with the given image index.
452 fn get_present_semaphores(&self, index: u32) -> Arc<Mutex<SwapchainPresentSemaphores>> {
453 self.present_semaphores[index as usize].clone()
454 }
455}
456
457pub struct Surface {
458 raw: vk::SurfaceKHR,
459 functor: khr::surface::Instance,
460 instance: Arc<InstanceShared>,
461 swapchain: RwLock<Option<Swapchain>>,
462}
463
464impl Surface {
465 /// Get the raw Vulkan swapchain associated with this surface.
466 ///
467 /// Returns [`None`] if the surface is not configured.
468 pub fn raw_swapchain(&self) -> Option<vk::SwapchainKHR> {
469 let read = self.swapchain.read();
470 read.as_ref().map(|it| it.raw)
471 }
472
473 /// Set the present timing information which will be used for the next [presentation](crate::Queue::present()) of this surface,
474 /// using [VK_GOOGLE_display_timing].
475 ///
476 /// This can be used to give an id to presentations, for future use of [`vk::PastPresentationTimingGOOGLE`].
477 /// Note that `wgpu-hal` does *not* provide a way to use that API - you should manually access this through [`ash`].
478 ///
479 /// This can also be used to add a "not before" timestamp to the presentation.
480 ///
481 /// The exact semantics of the fields are also documented in the [specification](https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPresentTimeGOOGLE.html) for the extension.
482 ///
483 /// # Panics
484 ///
485 /// - If the surface hasn't been configured.
486 /// - If the device doesn't [support present timing](wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING).
487 ///
488 /// [VK_GOOGLE_display_timing]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_GOOGLE_display_timing.html
489 #[track_caller]
490 pub fn set_next_present_time(&self, present_timing: vk::PresentTimeGOOGLE) {
491 let mut swapchain = self.swapchain.write();
492 let swapchain = swapchain
493 .as_mut()
494 .expect("Surface should have been configured");
495 let features = wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING;
496 if swapchain.device.features.contains(features) {
497 swapchain.next_present_time = Some(present_timing);
498 } else {
499 // Ideally we'd use something like `device.required_features` here, but that's in `wgpu-core`, which we are a dependency of
500 panic!(
501 concat!(
502 "Tried to set display timing properties ",
503 "without the corresponding feature ({:?}) enabled."
504 ),
505 features
506 );
507 }
508 }
509}
510
511#[derive(Debug)]
512pub struct SurfaceTexture {
513 index: u32,
514 texture: Texture,
515 acquire_semaphores: Arc<Mutex<SwapchainAcquireSemaphore>>,
516 present_semaphores: Arc<Mutex<SwapchainPresentSemaphores>>,
517}
518
519impl crate::DynSurfaceTexture for SurfaceTexture {}
520
521impl Borrow<Texture> for SurfaceTexture {
522 fn borrow(&self) -> &Texture {
523 &self.texture
524 }
525}
526
527impl Borrow<dyn crate::DynTexture> for SurfaceTexture {
528 fn borrow(&self) -> &dyn crate::DynTexture {
529 &self.texture
530 }
531}
532
533pub struct Adapter {
534 raw: vk::PhysicalDevice,
535 instance: Arc<InstanceShared>,
536 //queue_families: Vec<vk::QueueFamilyProperties>,
537 known_memory_flags: vk::MemoryPropertyFlags,
538 phd_capabilities: adapter::PhysicalDeviceProperties,
539 phd_features: PhysicalDeviceFeatures,
540 downlevel_flags: wgt::DownlevelFlags,
541 private_caps: PrivateCapabilities,
542 workarounds: Workarounds,
543}
544
545// TODO there's no reason why this can't be unified--the function pointers should all be the same--it's not clear how to do this with `ash`.
546enum ExtensionFn<T> {
547 /// The loaded function pointer struct for an extension.
548 Extension(T),
549 /// The extension was promoted to a core version of Vulkan and the functions on `ash`'s `DeviceV1_x` traits should be used.
550 Promoted,
551}
552
553struct DeviceExtensionFunctions {
554 debug_utils: Option<ext::debug_utils::Device>,
555 draw_indirect_count: Option<khr::draw_indirect_count::Device>,
556 timeline_semaphore: Option<ExtensionFn<khr::timeline_semaphore::Device>>,
557 ray_tracing: Option<RayTracingDeviceExtensionFunctions>,
558 mesh_shading: Option<ext::mesh_shader::Device>,
559}
560
561struct RayTracingDeviceExtensionFunctions {
562 acceleration_structure: khr::acceleration_structure::Device,
563 buffer_device_address: khr::buffer_device_address::Device,
564}
565
566/// Set of internal capabilities, which don't show up in the exposed
567/// device geometry, but affect the code paths taken internally.
568#[derive(Clone, Debug)]
569struct PrivateCapabilities {
570 image_view_usage: bool,
571 timeline_semaphores: bool,
572 texture_d24: bool,
573 texture_d24_s8: bool,
574 texture_s8: bool,
575 /// Ability to present contents to any screen. Only needed to work around broken platform configurations.
576 can_present: bool,
577 non_coherent_map_mask: wgt::BufferAddress,
578 multi_draw_indirect: bool,
579
580 /// True if this adapter advertises the [`robustBufferAccess`][vrba] feature.
581 ///
582 /// Note that Vulkan's `robustBufferAccess` is not sufficient to implement
583 /// `wgpu_hal`'s guarantee that shaders will not access buffer contents via
584 /// a given bindgroup binding outside that binding's [accessible
585 /// region][ar]. Enabling `robustBufferAccess` does ensure that
586 /// out-of-bounds reads and writes are not undefined behavior (that's good),
587 /// but still permits out-of-bounds reads to return data from anywhere
588 /// within the buffer, not just the accessible region.
589 ///
590 /// [ar]: ../struct.BufferBinding.html#accessible-region
591 /// [vrba]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#features-robustBufferAccess
592 robust_buffer_access: bool,
593
594 robust_image_access: bool,
595
596 /// True if this adapter supports the [`VK_EXT_robustness2`] extension's
597 /// [`robustBufferAccess2`] feature.
598 ///
599 /// This is sufficient to implement `wgpu_hal`'s [required bounds-checking][ar] of
600 /// shader accesses to buffer contents. If this feature is not available,
601 /// this backend must have Naga inject bounds checks in the generated
602 /// SPIR-V.
603 ///
604 /// [`VK_EXT_robustness2`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_EXT_robustness2.html
605 /// [`robustBufferAccess2`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPhysicalDeviceRobustness2FeaturesEXT.html#features-robustBufferAccess2
606 /// [ar]: ../struct.BufferBinding.html#accessible-region
607 robust_buffer_access2: bool,
608
609 robust_image_access2: bool,
610 zero_initialize_workgroup_memory: bool,
611 image_format_list: bool,
612 maximum_samplers: u32,
613
614 /// True if this adapter supports the [`VK_KHR_shader_integer_dot_product`] extension
615 /// (promoted to Vulkan 1.3).
616 ///
617 /// This is used to generate optimized code for WGSL's `dot4{I, U}8Packed`.
618 ///
619 /// [`VK_KHR_shader_integer_dot_product`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_KHR_shader_integer_dot_product.html
620 shader_integer_dot_product: bool,
621
622 /// True if this adapter supports 8-bit integers provided by the
623 /// [`VK_KHR_shader_float16_int8`] extension (promoted to Vulkan 1.2).
624 ///
625 /// Allows shaders to declare the "Int8" capability. Note, however, that this
626 /// feature alone allows the use of 8-bit integers "only in the `Private`,
627 /// `Workgroup` (for non-Block variables), and `Function` storage classes"
628 /// ([see spec]). To use 8-bit integers in the interface storage classes (e.g.,
629 /// `StorageBuffer`), you also need to enable the corresponding feature in
630 /// `VkPhysicalDevice8BitStorageFeatures` and declare the corresponding SPIR-V
631 /// capability (e.g., `StorageBuffer8BitAccess`).
632 ///
633 /// [`VK_KHR_shader_float16_int8`]: https://registry.khronos.org/vulkan/specs/latest/man/html/VK_KHR_shader_float16_int8.html
634 /// [see spec]: https://registry.khronos.org/vulkan/specs/latest/man/html/VkPhysicalDeviceShaderFloat16Int8Features.html#extension-features-shaderInt8
635 shader_int8: bool,
636}
637
638bitflags::bitflags!(
639 /// Workaround flags.
640 #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
641 pub struct Workarounds: u32 {
642 /// Only generate SPIR-V for one entry point at a time.
643 const SEPARATE_ENTRY_POINTS = 0x1;
644 /// Qualcomm OOMs when there are zero color attachments but a non-null pointer
645 /// to a subpass resolve attachment array. This nulls out that pointer in that case.
646 const EMPTY_RESOLVE_ATTACHMENT_LISTS = 0x2;
647 /// If the following code returns false, then nvidia will end up filling the wrong range.
648 ///
649 /// ```skip
650 /// fn nvidia_succeeds() -> bool {
651 /// # let (copy_length, start_offset) = (0, 0);
652 /// if copy_length >= 4096 {
653 /// if start_offset % 16 != 0 {
654 /// if copy_length == 4096 {
655 /// return true;
656 /// }
657 /// if copy_length % 16 == 0 {
658 /// return false;
659 /// }
660 /// }
661 /// }
662 /// true
663 /// }
664 /// ```
665 ///
666 /// As such, we need to make sure all calls to vkCmdFillBuffer are aligned to 16 bytes
667 /// if they cover a range of 4096 bytes or more.
668 const FORCE_FILL_BUFFER_WITH_SIZE_GREATER_4096_ALIGNED_OFFSET_16 = 0x4;
669 }
670);
671
672#[derive(Clone, Debug, Eq, Hash, PartialEq)]
673struct AttachmentKey {
674 format: vk::Format,
675 layout: vk::ImageLayout,
676 ops: crate::AttachmentOps,
677}
678
679impl AttachmentKey {
680 /// Returns an attachment key for a compatible attachment.
681 fn compatible(format: vk::Format, layout: vk::ImageLayout) -> Self {
682 Self {
683 format,
684 layout,
685 ops: crate::AttachmentOps::all(),
686 }
687 }
688}
689
690#[derive(Clone, Eq, Hash, PartialEq)]
691struct ColorAttachmentKey {
692 base: AttachmentKey,
693 resolve: Option<AttachmentKey>,
694}
695
696#[derive(Clone, Eq, Hash, PartialEq)]
697struct DepthStencilAttachmentKey {
698 base: AttachmentKey,
699 stencil_ops: crate::AttachmentOps,
700}
701
702#[derive(Clone, Eq, Default, Hash, PartialEq)]
703struct RenderPassKey {
704 colors: ArrayVec<Option<ColorAttachmentKey>, { crate::MAX_COLOR_ATTACHMENTS }>,
705 depth_stencil: Option<DepthStencilAttachmentKey>,
706 sample_count: u32,
707 multiview: Option<NonZeroU32>,
708}
709
710struct DeviceShared {
711 raw: ash::Device,
712 family_index: u32,
713 queue_index: u32,
714 raw_queue: vk::Queue,
715 drop_guard: Option<crate::DropGuard>,
716 instance: Arc<InstanceShared>,
717 physical_device: vk::PhysicalDevice,
718 enabled_extensions: Vec<&'static CStr>,
719 extension_fns: DeviceExtensionFunctions,
720 vendor_id: u32,
721 pipeline_cache_validation_key: [u8; 16],
722 timestamp_period: f32,
723 private_caps: PrivateCapabilities,
724 workarounds: Workarounds,
725 features: wgt::Features,
726 render_passes: Mutex<FastHashMap<RenderPassKey, vk::RenderPass>>,
727 sampler_cache: Mutex<sampler::SamplerCache>,
728 memory_allocations_counter: InternalCounter,
729
730 /// Because we have cached framebuffers which are not deleted from until
731 /// the device is destroyed, if the implementation of vulkan re-uses handles
732 /// we need some way to differentiate between the old handle and the new handle.
733 /// This factory allows us to have a dedicated identity value for each texture.
734 texture_identity_factory: ResourceIdentityFactory<vk::Image>,
735 /// As above, for texture views.
736 texture_view_identity_factory: ResourceIdentityFactory<vk::ImageView>,
737}
738
739impl Drop for DeviceShared {
740 fn drop(&mut self) {
741 for &raw in self.render_passes.lock().values() {
742 unsafe { self.raw.destroy_render_pass(raw, None) };
743 }
744 if self.drop_guard.is_none() {
745 unsafe { self.raw.destroy_device(None) };
746 }
747 }
748}
749
750pub struct Device {
751 shared: Arc<DeviceShared>,
752 mem_allocator: Mutex<gpu_alloc::GpuAllocator<vk::DeviceMemory>>,
753 desc_allocator:
754 Mutex<gpu_descriptor::DescriptorAllocator<vk::DescriptorPool, vk::DescriptorSet>>,
755 valid_ash_memory_types: u32,
756 naga_options: naga::back::spv::Options<'static>,
757 #[cfg(feature = "renderdoc")]
758 render_doc: crate::auxil::renderdoc::RenderDoc,
759 counters: Arc<wgt::HalCounters>,
760}
761
762impl Drop for Device {
763 fn drop(&mut self) {
764 unsafe { self.mem_allocator.lock().cleanup(&*self.shared) };
765 unsafe { self.desc_allocator.lock().cleanup(&*self.shared) };
766 }
767}
768
769/// Semaphores for forcing queue submissions to run in order.
770///
771/// The [`wgpu_hal::Queue`] trait promises that if two calls to [`submit`] are
772/// ordered, then the first submission will finish on the GPU before the second
773/// submission begins. To get this behavior on Vulkan we need to pass semaphores
774/// to [`vkQueueSubmit`] for the commands to wait on before beginning execution,
775/// and to signal when their execution is done.
776///
777/// Normally this can be done with a single semaphore, waited on and then
778/// signalled for each submission. At any given time there's exactly one
779/// submission that would signal the semaphore, and exactly one waiting on it,
780/// as Vulkan requires.
781///
782/// However, as of Oct 2021, bug [#5508] in the Mesa ANV drivers caused them to
783/// hang if we use a single semaphore. The workaround is to alternate between
784/// two semaphores. The bug has been fixed in Mesa, but we should probably keep
785/// the workaround until, say, Oct 2026.
786///
787/// [`wgpu_hal::Queue`]: crate::Queue
788/// [`submit`]: crate::Queue::submit
789/// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
790/// [#5508]: https://gitlab.freedesktop.org/mesa/mesa/-/issues/5508
791#[derive(Clone)]
792struct RelaySemaphores {
793 /// The semaphore the next submission should wait on before beginning
794 /// execution on the GPU. This is `None` for the first submission, which
795 /// should not wait on anything at all.
796 wait: Option<vk::Semaphore>,
797
798 /// The semaphore the next submission should signal when it has finished
799 /// execution on the GPU.
800 signal: vk::Semaphore,
801}
802
803impl RelaySemaphores {
804 fn new(device: &DeviceShared) -> Result<Self, crate::DeviceError> {
805 Ok(Self {
806 wait: None,
807 signal: device.new_binary_semaphore("RelaySemaphores: 1")?,
808 })
809 }
810
811 /// Advances the semaphores, returning the semaphores that should be used for a submission.
812 fn advance(&mut self, device: &DeviceShared) -> Result<Self, crate::DeviceError> {
813 let old = self.clone();
814
815 // Build the state for the next submission.
816 match self.wait {
817 None => {
818 // The `old` values describe the first submission to this queue.
819 // The second submission should wait on `old.signal`, and then
820 // signal a new semaphore which we'll create now.
821 self.wait = Some(old.signal);
822 self.signal = device.new_binary_semaphore("RelaySemaphores: 2")?;
823 }
824 Some(ref mut wait) => {
825 // What this submission signals, the next should wait.
826 mem::swap(wait, &mut self.signal);
827 }
828 };
829
830 Ok(old)
831 }
832
833 /// Destroys the semaphores.
834 unsafe fn destroy(&self, device: &ash::Device) {
835 unsafe {
836 if let Some(wait) = self.wait {
837 device.destroy_semaphore(wait, None);
838 }
839 device.destroy_semaphore(self.signal, None);
840 }
841 }
842}
843
844pub struct Queue {
845 raw: vk::Queue,
846 swapchain_fn: khr::swapchain::Device,
847 device: Arc<DeviceShared>,
848 family_index: u32,
849 relay_semaphores: Mutex<RelaySemaphores>,
850 signal_semaphores: Mutex<SemaphoreList>,
851}
852
853impl Queue {
854 pub fn as_raw(&self) -> vk::Queue {
855 self.raw
856 }
857}
858
859impl Drop for Queue {
860 fn drop(&mut self) {
861 unsafe { self.relay_semaphores.lock().destroy(&self.device.raw) };
862 }
863}
864#[derive(Debug)]
865enum BufferMemoryBacking {
866 Managed(gpu_alloc::MemoryBlock<vk::DeviceMemory>),
867 VulkanMemory {
868 memory: vk::DeviceMemory,
869 offset: u64,
870 size: u64,
871 },
872}
873impl BufferMemoryBacking {
874 fn memory(&self) -> &vk::DeviceMemory {
875 match self {
876 Self::Managed(m) => m.memory(),
877 Self::VulkanMemory { memory, .. } => memory,
878 }
879 }
880 fn offset(&self) -> u64 {
881 match self {
882 Self::Managed(m) => m.offset(),
883 Self::VulkanMemory { offset, .. } => *offset,
884 }
885 }
886 fn size(&self) -> u64 {
887 match self {
888 Self::Managed(m) => m.size(),
889 Self::VulkanMemory { size, .. } => *size,
890 }
891 }
892}
893#[derive(Debug)]
894pub struct Buffer {
895 raw: vk::Buffer,
896 block: Option<Mutex<BufferMemoryBacking>>,
897}
898impl Buffer {
899 /// # Safety
900 ///
901 /// - `vk_buffer`'s memory must be managed by the caller
902 /// - Externally imported buffers can't be mapped by `wgpu`
903 pub unsafe fn from_raw(vk_buffer: vk::Buffer) -> Self {
904 Self {
905 raw: vk_buffer,
906 block: None,
907 }
908 }
909 /// # Safety
910 /// - We will use this buffer and the buffer's backing memory range as if we have exclusive ownership over it, until the wgpu resource is dropped and the wgpu-hal object is cleaned up
911 /// - Externally imported buffers can't be mapped by `wgpu`
912 /// - `offset` and `size` must be valid with the allocation of `memory`
913 pub unsafe fn from_raw_managed(
914 vk_buffer: vk::Buffer,
915 memory: vk::DeviceMemory,
916 offset: u64,
917 size: u64,
918 ) -> Self {
919 Self {
920 raw: vk_buffer,
921 block: Some(Mutex::new(BufferMemoryBacking::VulkanMemory {
922 memory,
923 offset,
924 size,
925 })),
926 }
927 }
928}
929
930impl crate::DynBuffer for Buffer {}
931
932#[derive(Debug)]
933pub struct AccelerationStructure {
934 raw: vk::AccelerationStructureKHR,
935 buffer: vk::Buffer,
936 block: Mutex<gpu_alloc::MemoryBlock<vk::DeviceMemory>>,
937 compacted_size_query: Option<vk::QueryPool>,
938}
939
940impl crate::DynAccelerationStructure for AccelerationStructure {}
941
942#[derive(Debug)]
943pub struct Texture {
944 raw: vk::Image,
945 drop_guard: Option<crate::DropGuard>,
946 external_memory: Option<vk::DeviceMemory>,
947 block: Option<gpu_alloc::MemoryBlock<vk::DeviceMemory>>,
948 format: wgt::TextureFormat,
949 copy_size: crate::CopyExtent,
950 identity: ResourceIdentity<vk::Image>,
951}
952
953impl crate::DynTexture for Texture {}
954
955impl Texture {
956 /// # Safety
957 ///
958 /// - The image handle must not be manually destroyed
959 pub unsafe fn raw_handle(&self) -> vk::Image {
960 self.raw
961 }
962}
963
964#[derive(Debug)]
965pub struct TextureView {
966 raw_texture: vk::Image,
967 raw: vk::ImageView,
968 layers: NonZeroU32,
969 format: wgt::TextureFormat,
970 raw_format: vk::Format,
971 base_mip_level: u32,
972 dimension: wgt::TextureViewDimension,
973 texture_identity: ResourceIdentity<vk::Image>,
974 view_identity: ResourceIdentity<vk::ImageView>,
975}
976
977impl crate::DynTextureView for TextureView {}
978
979impl TextureView {
980 /// # Safety
981 ///
982 /// - The image view handle must not be manually destroyed
983 pub unsafe fn raw_handle(&self) -> vk::ImageView {
984 self.raw
985 }
986
987 /// Returns the raw texture view, along with its identity.
988 fn identified_raw_view(&self) -> IdentifiedTextureView {
989 IdentifiedTextureView {
990 raw: self.raw,
991 identity: self.view_identity,
992 }
993 }
994}
995
996#[derive(Debug)]
997pub struct Sampler {
998 raw: vk::Sampler,
999 create_info: vk::SamplerCreateInfo<'static>,
1000}
1001
1002impl crate::DynSampler for Sampler {}
1003
1004#[derive(Debug)]
1005pub struct BindGroupLayout {
1006 raw: vk::DescriptorSetLayout,
1007 desc_count: gpu_descriptor::DescriptorTotalCount,
1008 types: Box<[(vk::DescriptorType, u32)]>,
1009 /// Map of binding index to size,
1010 binding_arrays: Vec<(u32, NonZeroU32)>,
1011}
1012
1013impl crate::DynBindGroupLayout for BindGroupLayout {}
1014
1015#[derive(Debug)]
1016pub struct PipelineLayout {
1017 raw: vk::PipelineLayout,
1018 binding_arrays: naga::back::spv::BindingMap,
1019}
1020
1021impl crate::DynPipelineLayout for PipelineLayout {}
1022
1023#[derive(Debug)]
1024pub struct BindGroup {
1025 set: gpu_descriptor::DescriptorSet<vk::DescriptorSet>,
1026}
1027
1028impl crate::DynBindGroup for BindGroup {}
1029
1030/// Miscellaneous allocation recycling pool for `CommandAllocator`.
1031#[derive(Default)]
1032struct Temp {
1033 marker: Vec<u8>,
1034 buffer_barriers: Vec<vk::BufferMemoryBarrier<'static>>,
1035 image_barriers: Vec<vk::ImageMemoryBarrier<'static>>,
1036}
1037
1038impl Temp {
1039 fn clear(&mut self) {
1040 self.marker.clear();
1041 self.buffer_barriers.clear();
1042 self.image_barriers.clear();
1043 }
1044
1045 fn make_c_str(&mut self, name: &str) -> &CStr {
1046 self.marker.clear();
1047 self.marker.extend_from_slice(name.as_bytes());
1048 self.marker.push(0);
1049 unsafe { CStr::from_bytes_with_nul_unchecked(&self.marker) }
1050 }
1051}
1052
1053/// Generates unique IDs for each resource of type `T`.
1054///
1055/// Because vk handles are not permanently unique, this
1056/// provides a way to generate unique IDs for each resource.
1057struct ResourceIdentityFactory<T> {
1058 #[cfg(not(target_has_atomic = "64"))]
1059 next_id: Mutex<u64>,
1060 #[cfg(target_has_atomic = "64")]
1061 next_id: core::sync::atomic::AtomicU64,
1062 _phantom: PhantomData<T>,
1063}
1064
1065impl<T> ResourceIdentityFactory<T> {
1066 fn new() -> Self {
1067 Self {
1068 #[cfg(not(target_has_atomic = "64"))]
1069 next_id: Mutex::new(0),
1070 #[cfg(target_has_atomic = "64")]
1071 next_id: core::sync::atomic::AtomicU64::new(0),
1072 _phantom: PhantomData,
1073 }
1074 }
1075
1076 /// Returns a new unique ID for a resource of type `T`.
1077 fn next(&self) -> ResourceIdentity<T> {
1078 #[cfg(not(target_has_atomic = "64"))]
1079 {
1080 let mut next_id = self.next_id.lock();
1081 let id = *next_id;
1082 *next_id += 1;
1083 ResourceIdentity {
1084 id,
1085 _phantom: PhantomData,
1086 }
1087 }
1088
1089 #[cfg(target_has_atomic = "64")]
1090 ResourceIdentity {
1091 id: self
1092 .next_id
1093 .fetch_add(1, core::sync::atomic::Ordering::Relaxed),
1094 _phantom: PhantomData,
1095 }
1096 }
1097}
1098
1099/// A unique identifier for a resource of type `T`.
1100///
1101/// This is used as a hashable key for resources, which
1102/// is permanently unique through the lifetime of the program.
1103#[derive(Debug, Copy, Clone, Eq, Hash, PartialEq)]
1104struct ResourceIdentity<T> {
1105 id: u64,
1106 _phantom: PhantomData<T>,
1107}
1108
1109#[derive(Clone, Eq, Hash, PartialEq)]
1110struct FramebufferKey {
1111 raw_pass: vk::RenderPass,
1112 /// Because this is used as a key in a hash map, we need to include the identity
1113 /// so that this hashes differently, even if the ImageView handles are the same
1114 /// between different views.
1115 attachment_identities: ArrayVec<ResourceIdentity<vk::ImageView>, { MAX_TOTAL_ATTACHMENTS }>,
1116 /// While this is redundant for calculating the hash, we need access to an array
1117 /// of all the raw ImageViews when we are creating the actual framebuffer,
1118 /// so we store this here.
1119 attachment_views: ArrayVec<vk::ImageView, { MAX_TOTAL_ATTACHMENTS }>,
1120 extent: wgt::Extent3d,
1121}
1122
1123impl FramebufferKey {
1124 fn push_view(&mut self, view: IdentifiedTextureView) {
1125 self.attachment_identities.push(view.identity);
1126 self.attachment_views.push(view.raw);
1127 }
1128}
1129
1130/// A texture view paired with its identity.
1131#[derive(Copy, Clone)]
1132struct IdentifiedTextureView {
1133 raw: vk::ImageView,
1134 identity: ResourceIdentity<vk::ImageView>,
1135}
1136
1137#[derive(Clone, Eq, Hash, PartialEq)]
1138struct TempTextureViewKey {
1139 texture: vk::Image,
1140 /// As this is used in a hashmap, we need to
1141 /// include the identity so that this hashes differently,
1142 /// even if the Image handles are the same between different images.
1143 texture_identity: ResourceIdentity<vk::Image>,
1144 format: vk::Format,
1145 mip_level: u32,
1146 depth_slice: u32,
1147}
1148
1149pub struct CommandEncoder {
1150 raw: vk::CommandPool,
1151 device: Arc<DeviceShared>,
1152
1153 /// The current command buffer, if `self` is in the ["recording"]
1154 /// state.
1155 ///
1156 /// ["recording"]: crate::CommandEncoder
1157 ///
1158 /// If non-`null`, the buffer is in the Vulkan "recording" state.
1159 active: vk::CommandBuffer,
1160
1161 /// What kind of pass we are currently within: compute or render.
1162 bind_point: vk::PipelineBindPoint,
1163
1164 /// Allocation recycling pool for this encoder.
1165 temp: Temp,
1166
1167 /// A pool of available command buffers.
1168 ///
1169 /// These are all in the Vulkan "initial" state.
1170 free: Vec<vk::CommandBuffer>,
1171
1172 /// A pool of discarded command buffers.
1173 ///
1174 /// These could be in any Vulkan state except "pending".
1175 discarded: Vec<vk::CommandBuffer>,
1176
1177 /// If this is true, the active renderpass enabled a debug span,
1178 /// and needs to be disabled on renderpass close.
1179 rpass_debug_marker_active: bool,
1180
1181 /// If set, the end of the next render/compute pass will write a timestamp at
1182 /// the given pool & location.
1183 end_of_pass_timer_query: Option<(vk::QueryPool, u32)>,
1184
1185 framebuffers: FastHashMap<FramebufferKey, vk::Framebuffer>,
1186 temp_texture_views: FastHashMap<TempTextureViewKey, IdentifiedTextureView>,
1187
1188 counters: Arc<wgt::HalCounters>,
1189}
1190
1191impl Drop for CommandEncoder {
1192 fn drop(&mut self) {
1193 // SAFETY:
1194 //
1195 // VUID-vkDestroyCommandPool-commandPool-00041: wgpu_hal requires that a
1196 // `CommandBuffer` must live until its execution is complete, and that a
1197 // `CommandBuffer` must not outlive the `CommandEncoder` that built it.
1198 // Thus, we know that none of our `CommandBuffers` are in the "pending"
1199 // state.
1200 //
1201 // The other VUIDs are pretty obvious.
1202 unsafe {
1203 // `vkDestroyCommandPool` also frees any command buffers allocated
1204 // from that pool, so there's no need to explicitly call
1205 // `vkFreeCommandBuffers` on `cmd_encoder`'s `free` and `discarded`
1206 // fields.
1207 self.device.raw.destroy_command_pool(self.raw, None);
1208 }
1209
1210 for (_, fb) in self.framebuffers.drain() {
1211 unsafe { self.device.raw.destroy_framebuffer(fb, None) };
1212 }
1213
1214 for (_, view) in self.temp_texture_views.drain() {
1215 unsafe { self.device.raw.destroy_image_view(view.raw, None) };
1216 }
1217
1218 self.counters.command_encoders.sub(1);
1219 }
1220}
1221
1222impl CommandEncoder {
1223 /// # Safety
1224 ///
1225 /// - The command buffer handle must not be manually destroyed
1226 pub unsafe fn raw_handle(&self) -> vk::CommandBuffer {
1227 self.active
1228 }
1229}
1230
1231impl fmt::Debug for CommandEncoder {
1232 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1233 f.debug_struct("CommandEncoder")
1234 .field("raw", &self.raw)
1235 .finish()
1236 }
1237}
1238
1239#[derive(Debug)]
1240pub struct CommandBuffer {
1241 raw: vk::CommandBuffer,
1242}
1243
1244impl crate::DynCommandBuffer for CommandBuffer {}
1245
1246#[derive(Debug)]
1247#[allow(clippy::large_enum_variant)]
1248pub enum ShaderModule {
1249 Raw(vk::ShaderModule),
1250 Intermediate {
1251 naga_shader: crate::NagaShader,
1252 runtime_checks: wgt::ShaderRuntimeChecks,
1253 },
1254}
1255
1256impl crate::DynShaderModule for ShaderModule {}
1257
1258#[derive(Debug)]
1259pub struct RenderPipeline {
1260 raw: vk::Pipeline,
1261}
1262
1263impl crate::DynRenderPipeline for RenderPipeline {}
1264
1265#[derive(Debug)]
1266pub struct ComputePipeline {
1267 raw: vk::Pipeline,
1268}
1269
1270impl crate::DynComputePipeline for ComputePipeline {}
1271
1272#[derive(Debug)]
1273pub struct PipelineCache {
1274 raw: vk::PipelineCache,
1275}
1276
1277impl crate::DynPipelineCache for PipelineCache {}
1278
1279#[derive(Debug)]
1280pub struct QuerySet {
1281 raw: vk::QueryPool,
1282}
1283
1284impl crate::DynQuerySet for QuerySet {}
1285
1286/// The [`Api::Fence`] type for [`vulkan::Api`].
1287///
1288/// This is an `enum` because there are two possible implementations of
1289/// `wgpu-hal` fences on Vulkan: Vulkan fences, which work on any version of
1290/// Vulkan, and Vulkan timeline semaphores, which are easier and cheaper but
1291/// require non-1.0 features.
1292///
1293/// [`Device::create_fence`] returns a [`TimelineSemaphore`] if
1294/// [`VK_KHR_timeline_semaphore`] is available and enabled, and a [`FencePool`]
1295/// otherwise.
1296///
1297/// [`Api::Fence`]: crate::Api::Fence
1298/// [`vulkan::Api`]: Api
1299/// [`Device::create_fence`]: crate::Device::create_fence
1300/// [`TimelineSemaphore`]: Fence::TimelineSemaphore
1301/// [`VK_KHR_timeline_semaphore`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VK_KHR_timeline_semaphore
1302/// [`FencePool`]: Fence::FencePool
1303#[derive(Debug)]
1304pub enum Fence {
1305 /// A Vulkan [timeline semaphore].
1306 ///
1307 /// These are simpler to use than Vulkan fences, since timeline semaphores
1308 /// work exactly the way [`wpgu_hal::Api::Fence`] is specified to work.
1309 ///
1310 /// [timeline semaphore]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-semaphores
1311 /// [`wpgu_hal::Api::Fence`]: crate::Api::Fence
1312 TimelineSemaphore(vk::Semaphore),
1313
1314 /// A collection of Vulkan [fence]s, each associated with a [`FenceValue`].
1315 ///
1316 /// The effective [`FenceValue`] of this variant is the greater of
1317 /// `last_completed` and the maximum value associated with a signalled fence
1318 /// in `active`.
1319 ///
1320 /// Fences are available in all versions of Vulkan, but since they only have
1321 /// two states, "signaled" and "unsignaled", we need to use a separate fence
1322 /// for each queue submission we might want to wait for, and remember which
1323 /// [`FenceValue`] each one represents.
1324 ///
1325 /// [fence]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-fences
1326 /// [`FenceValue`]: crate::FenceValue
1327 FencePool {
1328 last_completed: crate::FenceValue,
1329 /// The pending fence values have to be ascending.
1330 active: Vec<(crate::FenceValue, vk::Fence)>,
1331 free: Vec<vk::Fence>,
1332 },
1333}
1334
1335impl crate::DynFence for Fence {}
1336
1337impl Fence {
1338 /// Return the highest [`FenceValue`] among the signalled fences in `active`.
1339 ///
1340 /// As an optimization, assume that we already know that the fence has
1341 /// reached `last_completed`, and don't bother checking fences whose values
1342 /// are less than that: those fences remain in the `active` array only
1343 /// because we haven't called `maintain` yet to clean them up.
1344 ///
1345 /// [`FenceValue`]: crate::FenceValue
1346 fn check_active(
1347 device: &ash::Device,
1348 mut last_completed: crate::FenceValue,
1349 active: &[(crate::FenceValue, vk::Fence)],
1350 ) -> Result<crate::FenceValue, crate::DeviceError> {
1351 for &(value, raw) in active.iter() {
1352 unsafe {
1353 if value > last_completed
1354 && device
1355 .get_fence_status(raw)
1356 .map_err(map_host_device_oom_and_lost_err)?
1357 {
1358 last_completed = value;
1359 }
1360 }
1361 }
1362 Ok(last_completed)
1363 }
1364
1365 /// Return the highest signalled [`FenceValue`] for `self`.
1366 ///
1367 /// [`FenceValue`]: crate::FenceValue
1368 fn get_latest(
1369 &self,
1370 device: &ash::Device,
1371 extension: Option<&ExtensionFn<khr::timeline_semaphore::Device>>,
1372 ) -> Result<crate::FenceValue, crate::DeviceError> {
1373 match *self {
1374 Self::TimelineSemaphore(raw) => unsafe {
1375 Ok(match *extension.unwrap() {
1376 ExtensionFn::Extension(ref ext) => ext
1377 .get_semaphore_counter_value(raw)
1378 .map_err(map_host_device_oom_and_lost_err)?,
1379 ExtensionFn::Promoted => device
1380 .get_semaphore_counter_value(raw)
1381 .map_err(map_host_device_oom_and_lost_err)?,
1382 })
1383 },
1384 Self::FencePool {
1385 last_completed,
1386 ref active,
1387 free: _,
1388 } => Self::check_active(device, last_completed, active),
1389 }
1390 }
1391
1392 /// Trim the internal state of this [`Fence`].
1393 ///
1394 /// This function has no externally visible effect, but you should call it
1395 /// periodically to keep this fence's resource consumption under control.
1396 ///
1397 /// For fences using the [`FencePool`] implementation, this function
1398 /// recycles fences that have been signaled. If you don't call this,
1399 /// [`Queue::submit`] will just keep allocating a new Vulkan fence every
1400 /// time it's called.
1401 ///
1402 /// [`FencePool`]: Fence::FencePool
1403 /// [`Queue::submit`]: crate::Queue::submit
1404 fn maintain(&mut self, device: &ash::Device) -> Result<(), crate::DeviceError> {
1405 match *self {
1406 Self::TimelineSemaphore(_) => {}
1407 Self::FencePool {
1408 ref mut last_completed,
1409 ref mut active,
1410 ref mut free,
1411 } => {
1412 let latest = Self::check_active(device, *last_completed, active)?;
1413 let base_free = free.len();
1414 for &(value, raw) in active.iter() {
1415 if value <= latest {
1416 free.push(raw);
1417 }
1418 }
1419 if free.len() != base_free {
1420 active.retain(|&(value, _)| value > latest);
1421 unsafe { device.reset_fences(&free[base_free..]) }
1422 .map_err(map_device_oom_err)?
1423 }
1424 *last_completed = latest;
1425 }
1426 }
1427 Ok(())
1428 }
1429}
1430
1431impl crate::Queue for Queue {
1432 type A = Api;
1433
1434 unsafe fn submit(
1435 &self,
1436 command_buffers: &[&CommandBuffer],
1437 surface_textures: &[&SurfaceTexture],
1438 (signal_fence, signal_value): (&mut Fence, crate::FenceValue),
1439 ) -> Result<(), crate::DeviceError> {
1440 let mut fence_raw = vk::Fence::null();
1441
1442 let mut wait_stage_masks = Vec::new();
1443 let mut wait_semaphores = Vec::new();
1444 let mut signal_semaphores = SemaphoreList::default();
1445
1446 // Double check that the same swapchain image isn't being given to us multiple times,
1447 // as that will deadlock when we try to lock them all.
1448 debug_assert!(
1449 {
1450 let mut check = HashSet::with_capacity(surface_textures.len());
1451 // We compare the Arcs by pointer, as Eq isn't well defined for SurfaceSemaphores.
1452 for st in surface_textures {
1453 check.insert(Arc::as_ptr(&st.acquire_semaphores) as usize);
1454 check.insert(Arc::as_ptr(&st.present_semaphores) as usize);
1455 }
1456 check.len() == surface_textures.len() * 2
1457 },
1458 "More than one surface texture is being used from the same swapchain. This will cause a deadlock in release."
1459 );
1460
1461 let locked_swapchain_semaphores = surface_textures
1462 .iter()
1463 .map(|st| {
1464 let acquire = st
1465 .acquire_semaphores
1466 .try_lock()
1467 .expect("Failed to lock surface acquire semaphore");
1468 let present = st
1469 .present_semaphores
1470 .try_lock()
1471 .expect("Failed to lock surface present semaphore");
1472
1473 (acquire, present)
1474 })
1475 .collect::<Vec<_>>();
1476
1477 for (mut acquire_semaphore, mut present_semaphores) in locked_swapchain_semaphores {
1478 acquire_semaphore.set_used_fence_value(signal_value);
1479
1480 // If we're the first submission to operate on this image, wait on
1481 // its acquire semaphore, to make sure the presentation engine is
1482 // done with it.
1483 if let Some(sem) = acquire_semaphore.get_acquire_wait_semaphore() {
1484 wait_stage_masks.push(vk::PipelineStageFlags::TOP_OF_PIPE);
1485 wait_semaphores.push(sem);
1486 }
1487
1488 // Get a semaphore to signal when we're done writing to this surface
1489 // image. Presentation of this image will wait for this.
1490 let signal_semaphore = present_semaphores.get_submit_signal_semaphore(&self.device)?;
1491 signal_semaphores.push_binary(signal_semaphore);
1492 }
1493
1494 let mut guard = self.signal_semaphores.lock();
1495 if !guard.is_empty() {
1496 signal_semaphores.append(&mut guard);
1497 }
1498
1499 // In order for submissions to be strictly ordered, we encode a dependency between each submission
1500 // using a pair of semaphores. This adds a wait if it is needed, and signals the next semaphore.
1501 let semaphore_state = self.relay_semaphores.lock().advance(&self.device)?;
1502
1503 if let Some(sem) = semaphore_state.wait {
1504 wait_stage_masks.push(vk::PipelineStageFlags::TOP_OF_PIPE);
1505 wait_semaphores.push(sem);
1506 }
1507
1508 signal_semaphores.push_binary(semaphore_state.signal);
1509
1510 // We need to signal our wgpu::Fence if we have one, this adds it to the signal list.
1511 signal_fence.maintain(&self.device.raw)?;
1512 match *signal_fence {
1513 Fence::TimelineSemaphore(raw) => {
1514 signal_semaphores.push_timeline(raw, signal_value);
1515 }
1516 Fence::FencePool {
1517 ref mut active,
1518 ref mut free,
1519 ..
1520 } => {
1521 fence_raw = match free.pop() {
1522 Some(raw) => raw,
1523 None => unsafe {
1524 self.device
1525 .raw
1526 .create_fence(&vk::FenceCreateInfo::default(), None)
1527 .map_err(map_host_device_oom_err)?
1528 },
1529 };
1530 active.push((signal_value, fence_raw));
1531 }
1532 }
1533
1534 let vk_cmd_buffers = command_buffers
1535 .iter()
1536 .map(|cmd| cmd.raw)
1537 .collect::<Vec<_>>();
1538
1539 let mut vk_info = vk::SubmitInfo::default().command_buffers(&vk_cmd_buffers);
1540
1541 vk_info = vk_info
1542 .wait_semaphores(&wait_semaphores)
1543 .wait_dst_stage_mask(&wait_stage_masks);
1544
1545 let mut vk_timeline_info = mem::MaybeUninit::uninit();
1546 vk_info = signal_semaphores.add_to_submit(vk_info, &mut vk_timeline_info);
1547
1548 profiling::scope!("vkQueueSubmit");
1549 unsafe {
1550 self.device
1551 .raw
1552 .queue_submit(self.raw, &[vk_info], fence_raw)
1553 .map_err(map_host_device_oom_and_lost_err)?
1554 };
1555 Ok(())
1556 }
1557
1558 unsafe fn present(
1559 &self,
1560 surface: &Surface,
1561 texture: SurfaceTexture,
1562 ) -> Result<(), crate::SurfaceError> {
1563 let mut swapchain = surface.swapchain.write();
1564 let ssc = swapchain.as_mut().unwrap();
1565 let mut acquire_semaphore = texture.acquire_semaphores.lock();
1566 let mut present_semaphores = texture.present_semaphores.lock();
1567
1568 let wait_semaphores = present_semaphores.get_present_wait_semaphores();
1569
1570 // Reset the acquire and present semaphores internal state
1571 // to be ready for the next frame.
1572 //
1573 // We do this before the actual call to present to ensure that
1574 // even if this method errors and early outs, we have reset
1575 // the state for next frame.
1576 acquire_semaphore.end_semaphore_usage();
1577 present_semaphores.end_semaphore_usage();
1578
1579 drop(acquire_semaphore);
1580
1581 let swapchains = [ssc.raw];
1582 let image_indices = [texture.index];
1583 let vk_info = vk::PresentInfoKHR::default()
1584 .swapchains(&swapchains)
1585 .image_indices(&image_indices)
1586 .wait_semaphores(&wait_semaphores);
1587
1588 let mut display_timing;
1589 let present_times;
1590 let vk_info = if let Some(present_time) = ssc.next_present_time.take() {
1591 debug_assert!(
1592 ssc.device
1593 .features
1594 .contains(wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING),
1595 "`next_present_time` should only be set if `VULKAN_GOOGLE_DISPLAY_TIMING` is enabled"
1596 );
1597 present_times = [present_time];
1598 display_timing = vk::PresentTimesInfoGOOGLE::default().times(&present_times);
1599 // SAFETY: We know that VK_GOOGLE_display_timing is present because of the safety contract on `next_present_time`.
1600 vk_info.push_next(&mut display_timing)
1601 } else {
1602 vk_info
1603 };
1604
1605 let suboptimal = {
1606 profiling::scope!("vkQueuePresentKHR");
1607 unsafe { self.swapchain_fn.queue_present(self.raw, &vk_info) }.map_err(|error| {
1608 match error {
1609 vk::Result::ERROR_OUT_OF_DATE_KHR => crate::SurfaceError::Outdated,
1610 vk::Result::ERROR_SURFACE_LOST_KHR => crate::SurfaceError::Lost,
1611 // We don't use VK_EXT_full_screen_exclusive
1612 // VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT
1613 _ => map_host_device_oom_and_lost_err(error).into(),
1614 }
1615 })?
1616 };
1617 if suboptimal {
1618 // We treat `VK_SUBOPTIMAL_KHR` as `VK_SUCCESS` on Android.
1619 // On Android 10+, libvulkan's `vkQueuePresentKHR` implementation returns `VK_SUBOPTIMAL_KHR` if not doing pre-rotation
1620 // (i.e `VkSwapchainCreateInfoKHR::preTransform` not being equal to the current device orientation).
1621 // This is always the case when the device orientation is anything other than the identity one, as we unconditionally use `VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR`.
1622 #[cfg(not(target_os = "android"))]
1623 log::warn!("Suboptimal present of frame {}", texture.index);
1624 }
1625 Ok(())
1626 }
1627
1628 unsafe fn get_timestamp_period(&self) -> f32 {
1629 self.device.timestamp_period
1630 }
1631}
1632
1633impl Queue {
1634 pub fn raw_device(&self) -> &ash::Device {
1635 &self.device.raw
1636 }
1637
1638 pub fn add_signal_semaphore(&self, semaphore: vk::Semaphore, semaphore_value: Option<u64>) {
1639 let mut guard = self.signal_semaphores.lock();
1640 if let Some(value) = semaphore_value {
1641 guard.push_timeline(semaphore, value);
1642 } else {
1643 guard.push_binary(semaphore);
1644 }
1645 }
1646}
1647
1648/// Maps
1649///
1650/// - VK_ERROR_OUT_OF_HOST_MEMORY
1651/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1652fn map_host_device_oom_err(err: vk::Result) -> crate::DeviceError {
1653 match err {
1654 vk::Result::ERROR_OUT_OF_HOST_MEMORY | vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => {
1655 get_oom_err(err)
1656 }
1657 e => get_unexpected_err(e),
1658 }
1659}
1660
1661/// Maps
1662///
1663/// - VK_ERROR_OUT_OF_HOST_MEMORY
1664/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1665/// - VK_ERROR_DEVICE_LOST
1666fn map_host_device_oom_and_lost_err(err: vk::Result) -> crate::DeviceError {
1667 match err {
1668 vk::Result::ERROR_DEVICE_LOST => get_lost_err(),
1669 other => map_host_device_oom_err(other),
1670 }
1671}
1672
1673/// Maps
1674///
1675/// - VK_ERROR_OUT_OF_HOST_MEMORY
1676/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1677/// - VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1678fn map_host_device_oom_and_ioca_err(err: vk::Result) -> crate::DeviceError {
1679 // We don't use VK_KHR_buffer_device_address
1680 // VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1681 map_host_device_oom_err(err)
1682}
1683
1684/// Maps
1685///
1686/// - VK_ERROR_OUT_OF_HOST_MEMORY
1687fn map_host_oom_err(err: vk::Result) -> crate::DeviceError {
1688 match err {
1689 vk::Result::ERROR_OUT_OF_HOST_MEMORY => get_oom_err(err),
1690 e => get_unexpected_err(e),
1691 }
1692}
1693
1694/// Maps
1695///
1696/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1697fn map_device_oom_err(err: vk::Result) -> crate::DeviceError {
1698 match err {
1699 vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => get_oom_err(err),
1700 e => get_unexpected_err(e),
1701 }
1702}
1703
1704/// Maps
1705///
1706/// - VK_ERROR_OUT_OF_HOST_MEMORY
1707/// - VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1708fn map_host_oom_and_ioca_err(err: vk::Result) -> crate::DeviceError {
1709 // We don't use VK_KHR_buffer_device_address
1710 // VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1711 map_host_oom_err(err)
1712}
1713
1714/// Maps
1715///
1716/// - VK_ERROR_OUT_OF_HOST_MEMORY
1717/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1718/// - VK_PIPELINE_COMPILE_REQUIRED_EXT
1719/// - VK_ERROR_INVALID_SHADER_NV
1720fn map_pipeline_err(err: vk::Result) -> crate::DeviceError {
1721 // We don't use VK_EXT_pipeline_creation_cache_control
1722 // VK_PIPELINE_COMPILE_REQUIRED_EXT
1723 // We don't use VK_NV_glsl_shader
1724 // VK_ERROR_INVALID_SHADER_NV
1725 map_host_device_oom_err(err)
1726}
1727
1728/// Returns [`crate::DeviceError::Unexpected`] or panics if the `internal_error_panic`
1729/// feature flag is enabled.
1730fn get_unexpected_err(_err: vk::Result) -> crate::DeviceError {
1731 #[cfg(feature = "internal_error_panic")]
1732 panic!("Unexpected Vulkan error: {_err:?}");
1733
1734 #[allow(unreachable_code)]
1735 crate::DeviceError::Unexpected
1736}
1737
1738/// Returns [`crate::DeviceError::OutOfMemory`].
1739fn get_oom_err(_err: vk::Result) -> crate::DeviceError {
1740 crate::DeviceError::OutOfMemory
1741}
1742
1743/// Returns [`crate::DeviceError::Lost`] or panics if the `device_lost_panic`
1744/// feature flag is enabled.
1745fn get_lost_err() -> crate::DeviceError {
1746 #[cfg(feature = "device_lost_panic")]
1747 panic!("Device lost");
1748
1749 #[allow(unreachable_code)]
1750 crate::DeviceError::Lost
1751}
1752
1753#[derive(Clone, Copy, Pod, Zeroable)]
1754#[repr(C)]
1755struct RawTlasInstance {
1756 transform: [f32; 12],
1757 custom_data_and_mask: u32,
1758 shader_binding_table_record_offset_and_flags: u32,
1759 acceleration_structure_reference: u64,
1760}
1761
1762/// Arguments to the [`CreateDeviceCallback`].
1763pub struct CreateDeviceCallbackArgs<'arg, 'pnext, 'this>
1764where
1765 'this: 'pnext,
1766{
1767 /// The extensions to enable for the device. You must not remove anything from this list,
1768 /// but you may add to it.
1769 pub extensions: &'arg mut Vec<&'static CStr>,
1770 /// The physical device features to enable. You may enable features, but must not disable any.
1771 pub device_features: &'arg mut PhysicalDeviceFeatures,
1772 /// The queue create infos for the device. You may add or modify queue create infos as needed.
1773 pub queue_create_infos: &'arg mut Vec<vk::DeviceQueueCreateInfo<'pnext>>,
1774 /// The create info for the device. You may add or modify things in the pnext chain, but
1775 /// do not turn features off. Additionally, do not add things to the list of extensions,
1776 /// or to the feature set, as all changes to that member will be overwritten.
1777 pub create_info: &'arg mut vk::DeviceCreateInfo<'pnext>,
1778 /// We need to have `'this` in the struct, so we can declare that all lifetimes coming from
1779 /// captures in the closure will live longer (and hence satisfy) `'pnext`. However, we
1780 /// don't actually directly use `'this`
1781 _phantom: PhantomData<&'this ()>,
1782}
1783
1784/// Callback to allow changing the vulkan device creation parameters.
1785///
1786/// # Safety:
1787/// - If you want to add extensions, add the to the `Vec<'static CStr>` not the create info,
1788/// as the create info value will be overwritten.
1789/// - Callback must not remove features.
1790/// - Callback must not change anything to what the instance does not support.
1791pub type CreateDeviceCallback<'this> =
1792 dyn for<'arg, 'pnext> FnOnce(CreateDeviceCallbackArgs<'arg, 'pnext, 'this>) + 'this;
1793
1794/// Arguments to the [`CreateInstanceCallback`].
1795pub struct CreateInstanceCallbackArgs<'arg, 'pnext, 'this>
1796where
1797 'this: 'pnext,
1798{
1799 /// The extensions to enable for the instance. You must not remove anything from this list,
1800 /// but you may add to it.
1801 pub extensions: &'arg mut Vec<&'static CStr>,
1802 /// The create info for the instance. You may add or modify things in the pnext chain, but
1803 /// do not turn features off. Additionally, do not add things to the list of extensions,
1804 /// all changes to that member will be overwritten.
1805 pub create_info: &'arg mut vk::InstanceCreateInfo<'pnext>,
1806 /// Vulkan entry point.
1807 pub entry: &'arg ash::Entry,
1808 /// We need to have `'this` in the struct, so we can declare that all lifetimes coming from
1809 /// captures in the closure will live longer (and hence satisfy) `'pnext`. However, we
1810 /// don't actually directly use `'this`
1811 _phantom: PhantomData<&'this ()>,
1812}
1813
1814/// Callback to allow changing the vulkan instance creation parameters.
1815///
1816/// # Safety:
1817/// - If you want to add extensions, add the to the `Vec<'static CStr>` not the create info,
1818/// as the create info value will be overwritten.
1819/// - Callback must not remove features.
1820/// - Callback must not change anything to what the instance does not support.
1821pub type CreateInstanceCallback<'this> =
1822 dyn for<'arg, 'pnext> FnOnce(CreateInstanceCallbackArgs<'arg, 'pnext, 'this>) + 'this;