wgpu_hal/vulkan/mod.rs
1/*!
2# Vulkan API internals.
3
4## Stack memory
5
6Ash expects slices, which we don't generally have available.
7We cope with this requirement by the combination of the following ways:
8 - temporarily allocating `Vec` on heap, where overhead is permitted
9 - growing temporary local storage
10
11## Framebuffers and Render passes
12
13Render passes are cached on the device and kept forever.
14
15Framebuffers are also cached on the device, but they are removed when
16any of the image views (they have) gets removed.
17If Vulkan supports image-less framebuffers,
18then the actual views are excluded from the framebuffer key.
19
20## Fences
21
22If timeline semaphores are available, they are used 1:1 with wgpu-hal fences.
23Otherwise, we manage a pool of `VkFence` objects behind each `hal::Fence`.
24
25!*/
26
27mod adapter;
28mod command;
29mod conv;
30mod device;
31mod drm;
32mod instance;
33mod sampler;
34mod semaphore_list;
35
36pub use adapter::PhysicalDeviceFeatures;
37
38use alloc::{boxed::Box, ffi::CString, sync::Arc, vec::Vec};
39use core::{borrow::Borrow, ffi::CStr, fmt, marker::PhantomData, mem, num::NonZeroU32};
40
41use arrayvec::ArrayVec;
42use ash::{ext, khr, vk};
43use bytemuck::{Pod, Zeroable};
44use hashbrown::HashSet;
45use parking_lot::{Mutex, RwLock};
46
47use naga::FastHashMap;
48use wgt::InternalCounter;
49
50use semaphore_list::SemaphoreList;
51
52const MILLIS_TO_NANOS: u64 = 1_000_000;
53const MAX_TOTAL_ATTACHMENTS: usize = crate::MAX_COLOR_ATTACHMENTS * 2 + 1;
54
55#[derive(Clone, Debug)]
56pub struct Api;
57
58impl crate::Api for Api {
59 const VARIANT: wgt::Backend = wgt::Backend::Vulkan;
60
61 type Instance = Instance;
62 type Surface = Surface;
63 type Adapter = Adapter;
64 type Device = Device;
65
66 type Queue = Queue;
67 type CommandEncoder = CommandEncoder;
68 type CommandBuffer = CommandBuffer;
69
70 type Buffer = Buffer;
71 type Texture = Texture;
72 type SurfaceTexture = SurfaceTexture;
73 type TextureView = TextureView;
74 type Sampler = Sampler;
75 type QuerySet = QuerySet;
76 type Fence = Fence;
77 type AccelerationStructure = AccelerationStructure;
78 type PipelineCache = PipelineCache;
79
80 type BindGroupLayout = BindGroupLayout;
81 type BindGroup = BindGroup;
82 type PipelineLayout = PipelineLayout;
83 type ShaderModule = ShaderModule;
84 type RenderPipeline = RenderPipeline;
85 type ComputePipeline = ComputePipeline;
86}
87
88crate::impl_dyn_resource!(
89 Adapter,
90 AccelerationStructure,
91 BindGroup,
92 BindGroupLayout,
93 Buffer,
94 CommandBuffer,
95 CommandEncoder,
96 ComputePipeline,
97 Device,
98 Fence,
99 Instance,
100 PipelineCache,
101 PipelineLayout,
102 QuerySet,
103 Queue,
104 RenderPipeline,
105 Sampler,
106 ShaderModule,
107 Surface,
108 SurfaceTexture,
109 Texture,
110 TextureView
111);
112
113struct DebugUtils {
114 extension: ext::debug_utils::Instance,
115 messenger: vk::DebugUtilsMessengerEXT,
116
117 /// Owning pointer to the debug messenger callback user data.
118 ///
119 /// `InstanceShared::drop` destroys the debug messenger before
120 /// dropping this, so the callback should never receive a dangling
121 /// user data pointer.
122 #[allow(dead_code)]
123 callback_data: Box<DebugUtilsMessengerUserData>,
124}
125
126pub struct DebugUtilsCreateInfo {
127 severity: vk::DebugUtilsMessageSeverityFlagsEXT,
128 message_type: vk::DebugUtilsMessageTypeFlagsEXT,
129 callback_data: Box<DebugUtilsMessengerUserData>,
130}
131
132#[derive(Debug)]
133/// The properties related to the validation layer needed for the
134/// DebugUtilsMessenger for their workarounds
135struct ValidationLayerProperties {
136 /// Validation layer description, from `vk::LayerProperties`.
137 layer_description: CString,
138
139 /// Validation layer specification version, from `vk::LayerProperties`.
140 layer_spec_version: u32,
141}
142
143/// User data needed by `instance::debug_utils_messenger_callback`.
144///
145/// When we create the [`vk::DebugUtilsMessengerEXT`], the `pUserData`
146/// pointer refers to one of these values.
147#[derive(Debug)]
148pub struct DebugUtilsMessengerUserData {
149 /// The properties related to the validation layer, if present
150 validation_layer_properties: Option<ValidationLayerProperties>,
151
152 /// If the OBS layer is present. OBS never increments the version of their layer,
153 /// so there's no reason to have the version.
154 has_obs_layer: bool,
155}
156
157pub struct InstanceShared {
158 raw: ash::Instance,
159 extensions: Vec<&'static CStr>,
160 drop_guard: Option<crate::DropGuard>,
161 flags: wgt::InstanceFlags,
162 memory_budget_thresholds: wgt::MemoryBudgetThresholds,
163 debug_utils: Option<DebugUtils>,
164 get_physical_device_properties: Option<khr::get_physical_device_properties2::Instance>,
165 entry: ash::Entry,
166 has_nv_optimus: bool,
167 android_sdk_version: u32,
168 /// The instance API version.
169 ///
170 /// Which is the version of Vulkan supported for instance-level functionality.
171 ///
172 /// It is associated with a `VkInstance` and its children,
173 /// except for a `VkPhysicalDevice` and its children.
174 instance_api_version: u32,
175}
176
177pub struct Instance {
178 shared: Arc<InstanceShared>,
179}
180
181/// Semaphore used to acquire a swapchain image.
182#[derive(Debug)]
183struct SwapchainAcquireSemaphore {
184 /// A semaphore that is signaled when this image is safe for us to modify.
185 ///
186 /// When [`vkAcquireNextImageKHR`] returns the index of the next swapchain
187 /// image that we should use, that image may actually still be in use by the
188 /// presentation engine, and is not yet safe to modify. However, that
189 /// function does accept a semaphore that it will signal when the image is
190 /// indeed safe to begin messing with.
191 ///
192 /// This semaphore is:
193 ///
194 /// - waited for by the first queue submission to operate on this image
195 /// since it was acquired, and
196 ///
197 /// - signaled by [`vkAcquireNextImageKHR`] when the acquired image is ready
198 /// for us to use.
199 ///
200 /// [`vkAcquireNextImageKHR`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkAcquireNextImageKHR
201 acquire: vk::Semaphore,
202
203 /// True if the next command submission operating on this image should wait
204 /// for [`acquire`].
205 ///
206 /// We must wait for `acquire` before drawing to this swapchain image, but
207 /// because `wgpu-hal` queue submissions are always strongly ordered, only
208 /// the first submission that works with a swapchain image actually needs to
209 /// wait. We set this flag when this image is acquired, and clear it the
210 /// first time it's passed to [`Queue::submit`] as a surface texture.
211 ///
212 /// Additionally, semaphores can only be waited on once, so we need to ensure
213 /// that we only actually pass this semaphore to the first submission that
214 /// uses that image.
215 ///
216 /// [`acquire`]: SwapchainAcquireSemaphore::acquire
217 /// [`Queue::submit`]: crate::Queue::submit
218 should_wait_for_acquire: bool,
219
220 /// The fence value of the last command submission that wrote to this image.
221 ///
222 /// The next time we try to acquire this image, we'll block until
223 /// this submission finishes, proving that [`acquire`] is ready to
224 /// pass to `vkAcquireNextImageKHR` again.
225 ///
226 /// [`acquire`]: SwapchainAcquireSemaphore::acquire
227 previously_used_submission_index: crate::FenceValue,
228}
229
230impl SwapchainAcquireSemaphore {
231 fn new(device: &DeviceShared, index: usize) -> Result<Self, crate::DeviceError> {
232 Ok(Self {
233 acquire: device
234 .new_binary_semaphore(&format!("SwapchainImageSemaphore: Index {index} acquire"))?,
235 should_wait_for_acquire: true,
236 previously_used_submission_index: 0,
237 })
238 }
239
240 /// Sets the fence value which the next acquire will wait for. This prevents
241 /// the semaphore from being used while the previous submission is still in flight.
242 fn set_used_fence_value(&mut self, value: crate::FenceValue) {
243 self.previously_used_submission_index = value;
244 }
245
246 /// Return the semaphore that commands drawing to this image should wait for, if any.
247 ///
248 /// This only returns `Some` once per acquisition; see
249 /// [`SwapchainAcquireSemaphore::should_wait_for_acquire`] for details.
250 fn get_acquire_wait_semaphore(&mut self) -> Option<vk::Semaphore> {
251 if self.should_wait_for_acquire {
252 self.should_wait_for_acquire = false;
253 Some(self.acquire)
254 } else {
255 None
256 }
257 }
258
259 /// Indicates the cpu-side usage of this semaphore has finished for the frame,
260 /// so reset internal state to be ready for the next frame.
261 fn end_semaphore_usage(&mut self) {
262 // Reset the acquire semaphore, so that the next time we acquire this
263 // image, we can wait for it again.
264 self.should_wait_for_acquire = true;
265 }
266
267 unsafe fn destroy(&self, device: &ash::Device) {
268 unsafe {
269 device.destroy_semaphore(self.acquire, None);
270 }
271 }
272}
273
274#[derive(Debug)]
275struct SwapchainPresentSemaphores {
276 /// A pool of semaphores for ordering presentation after drawing.
277 ///
278 /// The first [`present_index`] semaphores in this vector are:
279 ///
280 /// - all waited on by the call to [`vkQueuePresentKHR`] that presents this
281 /// image, and
282 ///
283 /// - each signaled by some [`vkQueueSubmit`] queue submission that draws to
284 /// this image, when the submission finishes execution.
285 ///
286 /// This vector accumulates one semaphore per submission that writes to this
287 /// image. This is awkward, but hard to avoid: [`vkQueuePresentKHR`]
288 /// requires a semaphore to order it with respect to drawing commands, and
289 /// we can't attach new completion semaphores to a command submission after
290 /// it's been submitted. This means that, at submission time, we must create
291 /// the semaphore we might need if the caller's next action is to enqueue a
292 /// presentation of this image.
293 ///
294 /// An alternative strategy would be for presentation to enqueue an empty
295 /// submit, ordered relative to other submits in the usual way, and
296 /// signaling a single presentation semaphore. But we suspect that submits
297 /// are usually expensive enough, and semaphores usually cheap enough, that
298 /// performance-sensitive users will avoid making many submits, so that the
299 /// cost of accumulated semaphores will usually be less than the cost of an
300 /// additional submit.
301 ///
302 /// Only the first [`present_index`] semaphores in the vector are actually
303 /// going to be signalled by submitted commands, and need to be waited for
304 /// by the next present call. Any semaphores beyond that index were created
305 /// for prior presents and are simply being retained for recycling.
306 ///
307 /// [`present_index`]: SwapchainPresentSemaphores::present_index
308 /// [`vkQueuePresentKHR`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueuePresentKHR
309 /// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
310 present: Vec<vk::Semaphore>,
311
312 /// The number of semaphores in [`present`] to be signalled for this submission.
313 ///
314 /// [`present`]: SwapchainPresentSemaphores::present
315 present_index: usize,
316
317 /// Which image this semaphore set is used for.
318 frame_index: usize,
319}
320
321impl SwapchainPresentSemaphores {
322 pub fn new(frame_index: usize) -> Self {
323 Self {
324 present: Vec::new(),
325 present_index: 0,
326 frame_index,
327 }
328 }
329
330 /// Return the semaphore that the next submission that writes to this image should
331 /// signal when it's done.
332 ///
333 /// See [`SwapchainPresentSemaphores::present`] for details.
334 fn get_submit_signal_semaphore(
335 &mut self,
336 device: &DeviceShared,
337 ) -> Result<vk::Semaphore, crate::DeviceError> {
338 // Try to recycle a semaphore we created for a previous presentation.
339 let sem = match self.present.get(self.present_index) {
340 Some(sem) => *sem,
341 None => {
342 let sem = device.new_binary_semaphore(&format!(
343 "SwapchainImageSemaphore: Image {} present semaphore {}",
344 self.frame_index, self.present_index
345 ))?;
346 self.present.push(sem);
347 sem
348 }
349 };
350
351 self.present_index += 1;
352
353 Ok(sem)
354 }
355
356 /// Indicates the cpu-side usage of this semaphore has finished for the frame,
357 /// so reset internal state to be ready for the next frame.
358 fn end_semaphore_usage(&mut self) {
359 // Reset the index to 0, so that the next time we get a semaphore, we
360 // start from the beginning of the list.
361 self.present_index = 0;
362 }
363
364 /// Return the semaphores that a presentation of this image should wait on.
365 ///
366 /// Return a slice of semaphores that the call to [`vkQueueSubmit`] that
367 /// ends this image's acquisition should wait for. See
368 /// [`SwapchainPresentSemaphores::present`] for details.
369 ///
370 /// Reset `self` to be ready for the next acquisition cycle.
371 ///
372 /// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
373 fn get_present_wait_semaphores(&mut self) -> Vec<vk::Semaphore> {
374 self.present[0..self.present_index].to_vec()
375 }
376
377 unsafe fn destroy(&self, device: &ash::Device) {
378 unsafe {
379 for sem in &self.present {
380 device.destroy_semaphore(*sem, None);
381 }
382 }
383 }
384}
385
386struct Swapchain {
387 raw: vk::SwapchainKHR,
388 functor: khr::swapchain::Device,
389 device: Arc<DeviceShared>,
390 images: Vec<vk::Image>,
391 config: crate::SurfaceConfiguration,
392
393 /// Semaphores used between image acquisition and the first submission
394 /// that uses that image. This is indexed using [`next_acquire_index`].
395 ///
396 /// Because we need to provide this to [`vkAcquireNextImageKHR`], we haven't
397 /// received the swapchain image index for the frame yet, so we cannot use
398 /// that to index it.
399 ///
400 /// Before we pass this to [`vkAcquireNextImageKHR`], we ensure that we wait on
401 /// the submission indicated by [`previously_used_submission_index`]. This enusres
402 /// the semaphore is no longer in use before we use it.
403 ///
404 /// [`next_acquire_index`]: Swapchain::next_acquire_index
405 /// [`vkAcquireNextImageKHR`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkAcquireNextImageKHR
406 /// [`previously_used_submission_index`]: SwapchainAcquireSemaphore::previously_used_submission_index
407 acquire_semaphores: Vec<Arc<Mutex<SwapchainAcquireSemaphore>>>,
408 /// The index of the next acquire semaphore to use.
409 ///
410 /// This is incremented each time we acquire a new image, and wraps around
411 /// to 0 when it reaches the end of [`acquire_semaphores`].
412 ///
413 /// [`acquire_semaphores`]: Swapchain::acquire_semaphores
414 next_acquire_index: usize,
415
416 /// Semaphore sets used between all submissions that write to an image and
417 /// the presentation of that image.
418 ///
419 /// This is indexed by the swapchain image index returned by
420 /// [`vkAcquireNextImageKHR`].
421 ///
422 /// We know it is safe to use these semaphores because use them
423 /// _after_ the acquire semaphore. Because the acquire semaphore
424 /// has been signaled, the previous presentation using that image
425 /// is known-finished, so this semaphore is no longer in use.
426 ///
427 /// [`vkAcquireNextImageKHR`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkAcquireNextImageKHR
428 present_semaphores: Vec<Arc<Mutex<SwapchainPresentSemaphores>>>,
429
430 /// The present timing information which will be set in the next call to [`present()`](crate::Queue::present()).
431 ///
432 /// # Safety
433 ///
434 /// This must only be set if [`wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING`] is enabled, and
435 /// so the VK_GOOGLE_display_timing extension is present.
436 next_present_time: Option<vk::PresentTimeGOOGLE>,
437}
438
439impl Swapchain {
440 /// Mark the current frame finished, advancing to the next acquire semaphore.
441 fn advance_acquire_semaphore(&mut self) {
442 let semaphore_count = self.acquire_semaphores.len();
443 self.next_acquire_index = (self.next_acquire_index + 1) % semaphore_count;
444 }
445
446 /// Get the next acquire semaphore that should be used with this swapchain.
447 fn get_acquire_semaphore(&self) -> Arc<Mutex<SwapchainAcquireSemaphore>> {
448 self.acquire_semaphores[self.next_acquire_index].clone()
449 }
450
451 /// Get the set of present semaphores that should be used with the given image index.
452 fn get_present_semaphores(&self, index: u32) -> Arc<Mutex<SwapchainPresentSemaphores>> {
453 self.present_semaphores[index as usize].clone()
454 }
455}
456
457pub struct Surface {
458 raw: vk::SurfaceKHR,
459 functor: khr::surface::Instance,
460 instance: Arc<InstanceShared>,
461 swapchain: RwLock<Option<Swapchain>>,
462}
463
464impl Surface {
465 /// Get the raw Vulkan swapchain associated with this surface.
466 ///
467 /// Returns [`None`] if the surface is not configured.
468 pub fn raw_swapchain(&self) -> Option<vk::SwapchainKHR> {
469 let read = self.swapchain.read();
470 read.as_ref().map(|it| it.raw)
471 }
472
473 /// Set the present timing information which will be used for the next [presentation](crate::Queue::present()) of this surface,
474 /// using [VK_GOOGLE_display_timing].
475 ///
476 /// This can be used to give an id to presentations, for future use of [`vk::PastPresentationTimingGOOGLE`].
477 /// Note that `wgpu-hal` does *not* provide a way to use that API - you should manually access this through [`ash`].
478 ///
479 /// This can also be used to add a "not before" timestamp to the presentation.
480 ///
481 /// The exact semantics of the fields are also documented in the [specification](https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPresentTimeGOOGLE.html) for the extension.
482 ///
483 /// # Panics
484 ///
485 /// - If the surface hasn't been configured.
486 /// - If the device doesn't [support present timing](wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING).
487 ///
488 /// [VK_GOOGLE_display_timing]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_GOOGLE_display_timing.html
489 #[track_caller]
490 pub fn set_next_present_time(&self, present_timing: vk::PresentTimeGOOGLE) {
491 let mut swapchain = self.swapchain.write();
492 let swapchain = swapchain
493 .as_mut()
494 .expect("Surface should have been configured");
495 let features = wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING;
496 if swapchain.device.features.contains(features) {
497 swapchain.next_present_time = Some(present_timing);
498 } else {
499 // Ideally we'd use something like `device.required_features` here, but that's in `wgpu-core`, which we are a dependency of
500 panic!(
501 concat!(
502 "Tried to set display timing properties ",
503 "without the corresponding feature ({:?}) enabled."
504 ),
505 features
506 );
507 }
508 }
509}
510
511#[derive(Debug)]
512pub struct SurfaceTexture {
513 index: u32,
514 texture: Texture,
515 acquire_semaphores: Arc<Mutex<SwapchainAcquireSemaphore>>,
516 present_semaphores: Arc<Mutex<SwapchainPresentSemaphores>>,
517}
518
519impl crate::DynSurfaceTexture for SurfaceTexture {}
520
521impl Borrow<Texture> for SurfaceTexture {
522 fn borrow(&self) -> &Texture {
523 &self.texture
524 }
525}
526
527impl Borrow<dyn crate::DynTexture> for SurfaceTexture {
528 fn borrow(&self) -> &dyn crate::DynTexture {
529 &self.texture
530 }
531}
532
533pub struct Adapter {
534 raw: vk::PhysicalDevice,
535 instance: Arc<InstanceShared>,
536 //queue_families: Vec<vk::QueueFamilyProperties>,
537 known_memory_flags: vk::MemoryPropertyFlags,
538 phd_capabilities: adapter::PhysicalDeviceProperties,
539 phd_features: PhysicalDeviceFeatures,
540 downlevel_flags: wgt::DownlevelFlags,
541 private_caps: PrivateCapabilities,
542 workarounds: Workarounds,
543}
544
545// TODO there's no reason why this can't be unified--the function pointers should all be the same--it's not clear how to do this with `ash`.
546enum ExtensionFn<T> {
547 /// The loaded function pointer struct for an extension.
548 Extension(T),
549 /// The extension was promoted to a core version of Vulkan and the functions on `ash`'s `DeviceV1_x` traits should be used.
550 Promoted,
551}
552
553struct DeviceExtensionFunctions {
554 debug_utils: Option<ext::debug_utils::Device>,
555 draw_indirect_count: Option<khr::draw_indirect_count::Device>,
556 timeline_semaphore: Option<ExtensionFn<khr::timeline_semaphore::Device>>,
557 ray_tracing: Option<RayTracingDeviceExtensionFunctions>,
558 mesh_shading: Option<ext::mesh_shader::Device>,
559}
560
561struct RayTracingDeviceExtensionFunctions {
562 acceleration_structure: khr::acceleration_structure::Device,
563 buffer_device_address: khr::buffer_device_address::Device,
564}
565
566/// Set of internal capabilities, which don't show up in the exposed
567/// device geometry, but affect the code paths taken internally.
568#[derive(Clone, Debug)]
569struct PrivateCapabilities {
570 image_view_usage: bool,
571 timeline_semaphores: bool,
572 texture_d24: bool,
573 texture_d24_s8: bool,
574 texture_s8: bool,
575 /// Ability to present contents to any screen. Only needed to work around broken platform configurations.
576 can_present: bool,
577 non_coherent_map_mask: wgt::BufferAddress,
578
579 /// True if this adapter advertises the [`robustBufferAccess`][vrba] feature.
580 ///
581 /// Note that Vulkan's `robustBufferAccess` is not sufficient to implement
582 /// `wgpu_hal`'s guarantee that shaders will not access buffer contents via
583 /// a given bindgroup binding outside that binding's [accessible
584 /// region][ar]. Enabling `robustBufferAccess` does ensure that
585 /// out-of-bounds reads and writes are not undefined behavior (that's good),
586 /// but still permits out-of-bounds reads to return data from anywhere
587 /// within the buffer, not just the accessible region.
588 ///
589 /// [ar]: ../struct.BufferBinding.html#accessible-region
590 /// [vrba]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#features-robustBufferAccess
591 robust_buffer_access: bool,
592
593 robust_image_access: bool,
594
595 /// True if this adapter supports the [`VK_EXT_robustness2`] extension's
596 /// [`robustBufferAccess2`] feature.
597 ///
598 /// This is sufficient to implement `wgpu_hal`'s [required bounds-checking][ar] of
599 /// shader accesses to buffer contents. If this feature is not available,
600 /// this backend must have Naga inject bounds checks in the generated
601 /// SPIR-V.
602 ///
603 /// [`VK_EXT_robustness2`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_EXT_robustness2.html
604 /// [`robustBufferAccess2`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPhysicalDeviceRobustness2FeaturesEXT.html#features-robustBufferAccess2
605 /// [ar]: ../struct.BufferBinding.html#accessible-region
606 robust_buffer_access2: bool,
607
608 robust_image_access2: bool,
609 zero_initialize_workgroup_memory: bool,
610 image_format_list: bool,
611 maximum_samplers: u32,
612
613 /// True if this adapter supports the [`VK_KHR_shader_integer_dot_product`] extension
614 /// (promoted to Vulkan 1.3).
615 ///
616 /// This is used to generate optimized code for WGSL's `dot4{I, U}8Packed`.
617 ///
618 /// [`VK_KHR_shader_integer_dot_product`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_KHR_shader_integer_dot_product.html
619 shader_integer_dot_product: bool,
620
621 /// True if this adapter supports 8-bit integers provided by the
622 /// [`VK_KHR_shader_float16_int8`] extension (promoted to Vulkan 1.2).
623 ///
624 /// Allows shaders to declare the "Int8" capability. Note, however, that this
625 /// feature alone allows the use of 8-bit integers "only in the `Private`,
626 /// `Workgroup` (for non-Block variables), and `Function` storage classes"
627 /// ([see spec]). To use 8-bit integers in the interface storage classes (e.g.,
628 /// `StorageBuffer`), you also need to enable the corresponding feature in
629 /// `VkPhysicalDevice8BitStorageFeatures` and declare the corresponding SPIR-V
630 /// capability (e.g., `StorageBuffer8BitAccess`).
631 ///
632 /// [`VK_KHR_shader_float16_int8`]: https://registry.khronos.org/vulkan/specs/latest/man/html/VK_KHR_shader_float16_int8.html
633 /// [see spec]: https://registry.khronos.org/vulkan/specs/latest/man/html/VkPhysicalDeviceShaderFloat16Int8Features.html#extension-features-shaderInt8
634 shader_int8: bool,
635}
636
637bitflags::bitflags!(
638 /// Workaround flags.
639 #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
640 pub struct Workarounds: u32 {
641 /// Only generate SPIR-V for one entry point at a time.
642 const SEPARATE_ENTRY_POINTS = 0x1;
643 /// Qualcomm OOMs when there are zero color attachments but a non-null pointer
644 /// to a subpass resolve attachment array. This nulls out that pointer in that case.
645 const EMPTY_RESOLVE_ATTACHMENT_LISTS = 0x2;
646 /// If the following code returns false, then nvidia will end up filling the wrong range.
647 ///
648 /// ```skip
649 /// fn nvidia_succeeds() -> bool {
650 /// # let (copy_length, start_offset) = (0, 0);
651 /// if copy_length >= 4096 {
652 /// if start_offset % 16 != 0 {
653 /// if copy_length == 4096 {
654 /// return true;
655 /// }
656 /// if copy_length % 16 == 0 {
657 /// return false;
658 /// }
659 /// }
660 /// }
661 /// true
662 /// }
663 /// ```
664 ///
665 /// As such, we need to make sure all calls to vkCmdFillBuffer are aligned to 16 bytes
666 /// if they cover a range of 4096 bytes or more.
667 const FORCE_FILL_BUFFER_WITH_SIZE_GREATER_4096_ALIGNED_OFFSET_16 = 0x4;
668 }
669);
670
671#[derive(Clone, Debug, Eq, Hash, PartialEq)]
672struct AttachmentKey {
673 format: vk::Format,
674 layout: vk::ImageLayout,
675 ops: crate::AttachmentOps,
676}
677
678impl AttachmentKey {
679 /// Returns an attachment key for a compatible attachment.
680 fn compatible(format: vk::Format, layout: vk::ImageLayout) -> Self {
681 Self {
682 format,
683 layout,
684 ops: crate::AttachmentOps::all(),
685 }
686 }
687}
688
689#[derive(Clone, Eq, Hash, PartialEq)]
690struct ColorAttachmentKey {
691 base: AttachmentKey,
692 resolve: Option<AttachmentKey>,
693}
694
695#[derive(Clone, Eq, Hash, PartialEq)]
696struct DepthStencilAttachmentKey {
697 base: AttachmentKey,
698 stencil_ops: crate::AttachmentOps,
699}
700
701#[derive(Clone, Eq, Default, Hash, PartialEq)]
702struct RenderPassKey {
703 colors: ArrayVec<Option<ColorAttachmentKey>, { crate::MAX_COLOR_ATTACHMENTS }>,
704 depth_stencil: Option<DepthStencilAttachmentKey>,
705 sample_count: u32,
706 multiview: Option<NonZeroU32>,
707}
708
709struct DeviceShared {
710 raw: ash::Device,
711 family_index: u32,
712 queue_index: u32,
713 raw_queue: vk::Queue,
714 drop_guard: Option<crate::DropGuard>,
715 instance: Arc<InstanceShared>,
716 physical_device: vk::PhysicalDevice,
717 enabled_extensions: Vec<&'static CStr>,
718 extension_fns: DeviceExtensionFunctions,
719 vendor_id: u32,
720 pipeline_cache_validation_key: [u8; 16],
721 timestamp_period: f32,
722 private_caps: PrivateCapabilities,
723 workarounds: Workarounds,
724 features: wgt::Features,
725 render_passes: Mutex<FastHashMap<RenderPassKey, vk::RenderPass>>,
726 sampler_cache: Mutex<sampler::SamplerCache>,
727 memory_allocations_counter: InternalCounter,
728
729 /// Because we have cached framebuffers which are not deleted from until
730 /// the device is destroyed, if the implementation of vulkan re-uses handles
731 /// we need some way to differentiate between the old handle and the new handle.
732 /// This factory allows us to have a dedicated identity value for each texture.
733 texture_identity_factory: ResourceIdentityFactory<vk::Image>,
734 /// As above, for texture views.
735 texture_view_identity_factory: ResourceIdentityFactory<vk::ImageView>,
736}
737
738impl Drop for DeviceShared {
739 fn drop(&mut self) {
740 for &raw in self.render_passes.lock().values() {
741 unsafe { self.raw.destroy_render_pass(raw, None) };
742 }
743 if self.drop_guard.is_none() {
744 unsafe { self.raw.destroy_device(None) };
745 }
746 }
747}
748
749pub struct Device {
750 shared: Arc<DeviceShared>,
751 mem_allocator: Mutex<gpu_alloc::GpuAllocator<vk::DeviceMemory>>,
752 desc_allocator:
753 Mutex<gpu_descriptor::DescriptorAllocator<vk::DescriptorPool, vk::DescriptorSet>>,
754 valid_ash_memory_types: u32,
755 naga_options: naga::back::spv::Options<'static>,
756 #[cfg(feature = "renderdoc")]
757 render_doc: crate::auxil::renderdoc::RenderDoc,
758 counters: Arc<wgt::HalCounters>,
759}
760
761impl Drop for Device {
762 fn drop(&mut self) {
763 unsafe { self.mem_allocator.lock().cleanup(&*self.shared) };
764 unsafe { self.desc_allocator.lock().cleanup(&*self.shared) };
765 }
766}
767
768/// Semaphores for forcing queue submissions to run in order.
769///
770/// The [`wgpu_hal::Queue`] trait promises that if two calls to [`submit`] are
771/// ordered, then the first submission will finish on the GPU before the second
772/// submission begins. To get this behavior on Vulkan we need to pass semaphores
773/// to [`vkQueueSubmit`] for the commands to wait on before beginning execution,
774/// and to signal when their execution is done.
775///
776/// Normally this can be done with a single semaphore, waited on and then
777/// signalled for each submission. At any given time there's exactly one
778/// submission that would signal the semaphore, and exactly one waiting on it,
779/// as Vulkan requires.
780///
781/// However, as of Oct 2021, bug [#5508] in the Mesa ANV drivers caused them to
782/// hang if we use a single semaphore. The workaround is to alternate between
783/// two semaphores. The bug has been fixed in Mesa, but we should probably keep
784/// the workaround until, say, Oct 2026.
785///
786/// [`wgpu_hal::Queue`]: crate::Queue
787/// [`submit`]: crate::Queue::submit
788/// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
789/// [#5508]: https://gitlab.freedesktop.org/mesa/mesa/-/issues/5508
790#[derive(Clone)]
791struct RelaySemaphores {
792 /// The semaphore the next submission should wait on before beginning
793 /// execution on the GPU. This is `None` for the first submission, which
794 /// should not wait on anything at all.
795 wait: Option<vk::Semaphore>,
796
797 /// The semaphore the next submission should signal when it has finished
798 /// execution on the GPU.
799 signal: vk::Semaphore,
800}
801
802impl RelaySemaphores {
803 fn new(device: &DeviceShared) -> Result<Self, crate::DeviceError> {
804 Ok(Self {
805 wait: None,
806 signal: device.new_binary_semaphore("RelaySemaphores: 1")?,
807 })
808 }
809
810 /// Advances the semaphores, returning the semaphores that should be used for a submission.
811 fn advance(&mut self, device: &DeviceShared) -> Result<Self, crate::DeviceError> {
812 let old = self.clone();
813
814 // Build the state for the next submission.
815 match self.wait {
816 None => {
817 // The `old` values describe the first submission to this queue.
818 // The second submission should wait on `old.signal`, and then
819 // signal a new semaphore which we'll create now.
820 self.wait = Some(old.signal);
821 self.signal = device.new_binary_semaphore("RelaySemaphores: 2")?;
822 }
823 Some(ref mut wait) => {
824 // What this submission signals, the next should wait.
825 mem::swap(wait, &mut self.signal);
826 }
827 };
828
829 Ok(old)
830 }
831
832 /// Destroys the semaphores.
833 unsafe fn destroy(&self, device: &ash::Device) {
834 unsafe {
835 if let Some(wait) = self.wait {
836 device.destroy_semaphore(wait, None);
837 }
838 device.destroy_semaphore(self.signal, None);
839 }
840 }
841}
842
843pub struct Queue {
844 raw: vk::Queue,
845 swapchain_fn: khr::swapchain::Device,
846 device: Arc<DeviceShared>,
847 family_index: u32,
848 relay_semaphores: Mutex<RelaySemaphores>,
849 signal_semaphores: Mutex<SemaphoreList>,
850}
851
852impl Queue {
853 pub fn as_raw(&self) -> vk::Queue {
854 self.raw
855 }
856}
857
858impl Drop for Queue {
859 fn drop(&mut self) {
860 unsafe { self.relay_semaphores.lock().destroy(&self.device.raw) };
861 }
862}
863#[derive(Debug)]
864enum BufferMemoryBacking {
865 Managed(gpu_alloc::MemoryBlock<vk::DeviceMemory>),
866 VulkanMemory {
867 memory: vk::DeviceMemory,
868 offset: u64,
869 size: u64,
870 },
871}
872impl BufferMemoryBacking {
873 fn memory(&self) -> &vk::DeviceMemory {
874 match self {
875 Self::Managed(m) => m.memory(),
876 Self::VulkanMemory { memory, .. } => memory,
877 }
878 }
879 fn offset(&self) -> u64 {
880 match self {
881 Self::Managed(m) => m.offset(),
882 Self::VulkanMemory { offset, .. } => *offset,
883 }
884 }
885 fn size(&self) -> u64 {
886 match self {
887 Self::Managed(m) => m.size(),
888 Self::VulkanMemory { size, .. } => *size,
889 }
890 }
891}
892#[derive(Debug)]
893pub struct Buffer {
894 raw: vk::Buffer,
895 block: Option<Mutex<BufferMemoryBacking>>,
896}
897impl Buffer {
898 /// # Safety
899 ///
900 /// - `vk_buffer`'s memory must be managed by the caller
901 /// - Externally imported buffers can't be mapped by `wgpu`
902 pub unsafe fn from_raw(vk_buffer: vk::Buffer) -> Self {
903 Self {
904 raw: vk_buffer,
905 block: None,
906 }
907 }
908 /// # Safety
909 /// - We will use this buffer and the buffer's backing memory range as if we have exclusive ownership over it, until the wgpu resource is dropped and the wgpu-hal object is cleaned up
910 /// - Externally imported buffers can't be mapped by `wgpu`
911 /// - `offset` and `size` must be valid with the allocation of `memory`
912 pub unsafe fn from_raw_managed(
913 vk_buffer: vk::Buffer,
914 memory: vk::DeviceMemory,
915 offset: u64,
916 size: u64,
917 ) -> Self {
918 Self {
919 raw: vk_buffer,
920 block: Some(Mutex::new(BufferMemoryBacking::VulkanMemory {
921 memory,
922 offset,
923 size,
924 })),
925 }
926 }
927}
928
929impl crate::DynBuffer for Buffer {}
930
931#[derive(Debug)]
932pub struct AccelerationStructure {
933 raw: vk::AccelerationStructureKHR,
934 buffer: vk::Buffer,
935 block: Mutex<gpu_alloc::MemoryBlock<vk::DeviceMemory>>,
936 compacted_size_query: Option<vk::QueryPool>,
937}
938
939impl crate::DynAccelerationStructure for AccelerationStructure {}
940
941#[derive(Debug)]
942pub struct Texture {
943 raw: vk::Image,
944 drop_guard: Option<crate::DropGuard>,
945 external_memory: Option<vk::DeviceMemory>,
946 block: Option<gpu_alloc::MemoryBlock<vk::DeviceMemory>>,
947 format: wgt::TextureFormat,
948 copy_size: crate::CopyExtent,
949 identity: ResourceIdentity<vk::Image>,
950}
951
952impl crate::DynTexture for Texture {}
953
954impl Texture {
955 /// # Safety
956 ///
957 /// - The image handle must not be manually destroyed
958 pub unsafe fn raw_handle(&self) -> vk::Image {
959 self.raw
960 }
961}
962
963#[derive(Debug)]
964pub struct TextureView {
965 raw_texture: vk::Image,
966 raw: vk::ImageView,
967 layers: NonZeroU32,
968 format: wgt::TextureFormat,
969 raw_format: vk::Format,
970 base_mip_level: u32,
971 dimension: wgt::TextureViewDimension,
972 texture_identity: ResourceIdentity<vk::Image>,
973 view_identity: ResourceIdentity<vk::ImageView>,
974}
975
976impl crate::DynTextureView for TextureView {}
977
978impl TextureView {
979 /// # Safety
980 ///
981 /// - The image view handle must not be manually destroyed
982 pub unsafe fn raw_handle(&self) -> vk::ImageView {
983 self.raw
984 }
985
986 /// Returns the raw texture view, along with its identity.
987 fn identified_raw_view(&self) -> IdentifiedTextureView {
988 IdentifiedTextureView {
989 raw: self.raw,
990 identity: self.view_identity,
991 }
992 }
993}
994
995#[derive(Debug)]
996pub struct Sampler {
997 raw: vk::Sampler,
998 create_info: vk::SamplerCreateInfo<'static>,
999}
1000
1001impl crate::DynSampler for Sampler {}
1002
1003#[derive(Debug)]
1004pub struct BindGroupLayout {
1005 raw: vk::DescriptorSetLayout,
1006 desc_count: gpu_descriptor::DescriptorTotalCount,
1007 types: Box<[(vk::DescriptorType, u32)]>,
1008 /// Map of binding index to size,
1009 binding_arrays: Vec<(u32, NonZeroU32)>,
1010}
1011
1012impl crate::DynBindGroupLayout for BindGroupLayout {}
1013
1014#[derive(Debug)]
1015pub struct PipelineLayout {
1016 raw: vk::PipelineLayout,
1017 binding_arrays: naga::back::spv::BindingMap,
1018}
1019
1020impl crate::DynPipelineLayout for PipelineLayout {}
1021
1022#[derive(Debug)]
1023pub struct BindGroup {
1024 set: gpu_descriptor::DescriptorSet<vk::DescriptorSet>,
1025}
1026
1027impl crate::DynBindGroup for BindGroup {}
1028
1029/// Miscellaneous allocation recycling pool for `CommandAllocator`.
1030#[derive(Default)]
1031struct Temp {
1032 marker: Vec<u8>,
1033 buffer_barriers: Vec<vk::BufferMemoryBarrier<'static>>,
1034 image_barriers: Vec<vk::ImageMemoryBarrier<'static>>,
1035}
1036
1037impl Temp {
1038 fn clear(&mut self) {
1039 self.marker.clear();
1040 self.buffer_barriers.clear();
1041 self.image_barriers.clear();
1042 }
1043
1044 fn make_c_str(&mut self, name: &str) -> &CStr {
1045 self.marker.clear();
1046 self.marker.extend_from_slice(name.as_bytes());
1047 self.marker.push(0);
1048 unsafe { CStr::from_bytes_with_nul_unchecked(&self.marker) }
1049 }
1050}
1051
1052/// Generates unique IDs for each resource of type `T`.
1053///
1054/// Because vk handles are not permanently unique, this
1055/// provides a way to generate unique IDs for each resource.
1056struct ResourceIdentityFactory<T> {
1057 #[cfg(not(target_has_atomic = "64"))]
1058 next_id: Mutex<u64>,
1059 #[cfg(target_has_atomic = "64")]
1060 next_id: core::sync::atomic::AtomicU64,
1061 _phantom: PhantomData<T>,
1062}
1063
1064impl<T> ResourceIdentityFactory<T> {
1065 fn new() -> Self {
1066 Self {
1067 #[cfg(not(target_has_atomic = "64"))]
1068 next_id: Mutex::new(0),
1069 #[cfg(target_has_atomic = "64")]
1070 next_id: core::sync::atomic::AtomicU64::new(0),
1071 _phantom: PhantomData,
1072 }
1073 }
1074
1075 /// Returns a new unique ID for a resource of type `T`.
1076 fn next(&self) -> ResourceIdentity<T> {
1077 #[cfg(not(target_has_atomic = "64"))]
1078 {
1079 let mut next_id = self.next_id.lock();
1080 let id = *next_id;
1081 *next_id += 1;
1082 ResourceIdentity {
1083 id,
1084 _phantom: PhantomData,
1085 }
1086 }
1087
1088 #[cfg(target_has_atomic = "64")]
1089 ResourceIdentity {
1090 id: self
1091 .next_id
1092 .fetch_add(1, core::sync::atomic::Ordering::Relaxed),
1093 _phantom: PhantomData,
1094 }
1095 }
1096}
1097
1098/// A unique identifier for a resource of type `T`.
1099///
1100/// This is used as a hashable key for resources, which
1101/// is permanently unique through the lifetime of the program.
1102#[derive(Debug, Copy, Clone, Eq, Hash, PartialEq)]
1103struct ResourceIdentity<T> {
1104 id: u64,
1105 _phantom: PhantomData<T>,
1106}
1107
1108#[derive(Clone, Eq, Hash, PartialEq)]
1109struct FramebufferKey {
1110 raw_pass: vk::RenderPass,
1111 /// Because this is used as a key in a hash map, we need to include the identity
1112 /// so that this hashes differently, even if the ImageView handles are the same
1113 /// between different views.
1114 attachment_identities: ArrayVec<ResourceIdentity<vk::ImageView>, { MAX_TOTAL_ATTACHMENTS }>,
1115 /// While this is redundant for calculating the hash, we need access to an array
1116 /// of all the raw ImageViews when we are creating the actual framebuffer,
1117 /// so we store this here.
1118 attachment_views: ArrayVec<vk::ImageView, { MAX_TOTAL_ATTACHMENTS }>,
1119 extent: wgt::Extent3d,
1120}
1121
1122impl FramebufferKey {
1123 fn push_view(&mut self, view: IdentifiedTextureView) {
1124 self.attachment_identities.push(view.identity);
1125 self.attachment_views.push(view.raw);
1126 }
1127}
1128
1129/// A texture view paired with its identity.
1130#[derive(Copy, Clone)]
1131struct IdentifiedTextureView {
1132 raw: vk::ImageView,
1133 identity: ResourceIdentity<vk::ImageView>,
1134}
1135
1136#[derive(Clone, Eq, Hash, PartialEq)]
1137struct TempTextureViewKey {
1138 texture: vk::Image,
1139 /// As this is used in a hashmap, we need to
1140 /// include the identity so that this hashes differently,
1141 /// even if the Image handles are the same between different images.
1142 texture_identity: ResourceIdentity<vk::Image>,
1143 format: vk::Format,
1144 mip_level: u32,
1145 depth_slice: u32,
1146}
1147
1148pub struct CommandEncoder {
1149 raw: vk::CommandPool,
1150 device: Arc<DeviceShared>,
1151
1152 /// The current command buffer, if `self` is in the ["recording"]
1153 /// state.
1154 ///
1155 /// ["recording"]: crate::CommandEncoder
1156 ///
1157 /// If non-`null`, the buffer is in the Vulkan "recording" state.
1158 active: vk::CommandBuffer,
1159
1160 /// What kind of pass we are currently within: compute or render.
1161 bind_point: vk::PipelineBindPoint,
1162
1163 /// Allocation recycling pool for this encoder.
1164 temp: Temp,
1165
1166 /// A pool of available command buffers.
1167 ///
1168 /// These are all in the Vulkan "initial" state.
1169 free: Vec<vk::CommandBuffer>,
1170
1171 /// A pool of discarded command buffers.
1172 ///
1173 /// These could be in any Vulkan state except "pending".
1174 discarded: Vec<vk::CommandBuffer>,
1175
1176 /// If this is true, the active renderpass enabled a debug span,
1177 /// and needs to be disabled on renderpass close.
1178 rpass_debug_marker_active: bool,
1179
1180 /// If set, the end of the next render/compute pass will write a timestamp at
1181 /// the given pool & location.
1182 end_of_pass_timer_query: Option<(vk::QueryPool, u32)>,
1183
1184 framebuffers: FastHashMap<FramebufferKey, vk::Framebuffer>,
1185 temp_texture_views: FastHashMap<TempTextureViewKey, IdentifiedTextureView>,
1186
1187 counters: Arc<wgt::HalCounters>,
1188}
1189
1190impl Drop for CommandEncoder {
1191 fn drop(&mut self) {
1192 // SAFETY:
1193 //
1194 // VUID-vkDestroyCommandPool-commandPool-00041: wgpu_hal requires that a
1195 // `CommandBuffer` must live until its execution is complete, and that a
1196 // `CommandBuffer` must not outlive the `CommandEncoder` that built it.
1197 // Thus, we know that none of our `CommandBuffers` are in the "pending"
1198 // state.
1199 //
1200 // The other VUIDs are pretty obvious.
1201 unsafe {
1202 // `vkDestroyCommandPool` also frees any command buffers allocated
1203 // from that pool, so there's no need to explicitly call
1204 // `vkFreeCommandBuffers` on `cmd_encoder`'s `free` and `discarded`
1205 // fields.
1206 self.device.raw.destroy_command_pool(self.raw, None);
1207 }
1208
1209 for (_, fb) in self.framebuffers.drain() {
1210 unsafe { self.device.raw.destroy_framebuffer(fb, None) };
1211 }
1212
1213 for (_, view) in self.temp_texture_views.drain() {
1214 unsafe { self.device.raw.destroy_image_view(view.raw, None) };
1215 }
1216
1217 self.counters.command_encoders.sub(1);
1218 }
1219}
1220
1221impl CommandEncoder {
1222 /// # Safety
1223 ///
1224 /// - The command buffer handle must not be manually destroyed
1225 pub unsafe fn raw_handle(&self) -> vk::CommandBuffer {
1226 self.active
1227 }
1228}
1229
1230impl fmt::Debug for CommandEncoder {
1231 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1232 f.debug_struct("CommandEncoder")
1233 .field("raw", &self.raw)
1234 .finish()
1235 }
1236}
1237
1238#[derive(Debug)]
1239pub struct CommandBuffer {
1240 raw: vk::CommandBuffer,
1241}
1242
1243impl crate::DynCommandBuffer for CommandBuffer {}
1244
1245#[derive(Debug)]
1246#[allow(clippy::large_enum_variant)]
1247pub enum ShaderModule {
1248 Raw(vk::ShaderModule),
1249 Intermediate {
1250 naga_shader: crate::NagaShader,
1251 runtime_checks: wgt::ShaderRuntimeChecks,
1252 },
1253}
1254
1255impl crate::DynShaderModule for ShaderModule {}
1256
1257#[derive(Debug)]
1258pub struct RenderPipeline {
1259 raw: vk::Pipeline,
1260}
1261
1262impl crate::DynRenderPipeline for RenderPipeline {}
1263
1264#[derive(Debug)]
1265pub struct ComputePipeline {
1266 raw: vk::Pipeline,
1267}
1268
1269impl crate::DynComputePipeline for ComputePipeline {}
1270
1271#[derive(Debug)]
1272pub struct PipelineCache {
1273 raw: vk::PipelineCache,
1274}
1275
1276impl crate::DynPipelineCache for PipelineCache {}
1277
1278#[derive(Debug)]
1279pub struct QuerySet {
1280 raw: vk::QueryPool,
1281}
1282
1283impl crate::DynQuerySet for QuerySet {}
1284
1285/// The [`Api::Fence`] type for [`vulkan::Api`].
1286///
1287/// This is an `enum` because there are two possible implementations of
1288/// `wgpu-hal` fences on Vulkan: Vulkan fences, which work on any version of
1289/// Vulkan, and Vulkan timeline semaphores, which are easier and cheaper but
1290/// require non-1.0 features.
1291///
1292/// [`Device::create_fence`] returns a [`TimelineSemaphore`] if
1293/// [`VK_KHR_timeline_semaphore`] is available and enabled, and a [`FencePool`]
1294/// otherwise.
1295///
1296/// [`Api::Fence`]: crate::Api::Fence
1297/// [`vulkan::Api`]: Api
1298/// [`Device::create_fence`]: crate::Device::create_fence
1299/// [`TimelineSemaphore`]: Fence::TimelineSemaphore
1300/// [`VK_KHR_timeline_semaphore`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VK_KHR_timeline_semaphore
1301/// [`FencePool`]: Fence::FencePool
1302#[derive(Debug)]
1303pub enum Fence {
1304 /// A Vulkan [timeline semaphore].
1305 ///
1306 /// These are simpler to use than Vulkan fences, since timeline semaphores
1307 /// work exactly the way [`wpgu_hal::Api::Fence`] is specified to work.
1308 ///
1309 /// [timeline semaphore]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-semaphores
1310 /// [`wpgu_hal::Api::Fence`]: crate::Api::Fence
1311 TimelineSemaphore(vk::Semaphore),
1312
1313 /// A collection of Vulkan [fence]s, each associated with a [`FenceValue`].
1314 ///
1315 /// The effective [`FenceValue`] of this variant is the greater of
1316 /// `last_completed` and the maximum value associated with a signalled fence
1317 /// in `active`.
1318 ///
1319 /// Fences are available in all versions of Vulkan, but since they only have
1320 /// two states, "signaled" and "unsignaled", we need to use a separate fence
1321 /// for each queue submission we might want to wait for, and remember which
1322 /// [`FenceValue`] each one represents.
1323 ///
1324 /// [fence]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-fences
1325 /// [`FenceValue`]: crate::FenceValue
1326 FencePool {
1327 last_completed: crate::FenceValue,
1328 /// The pending fence values have to be ascending.
1329 active: Vec<(crate::FenceValue, vk::Fence)>,
1330 free: Vec<vk::Fence>,
1331 },
1332}
1333
1334impl crate::DynFence for Fence {}
1335
1336impl Fence {
1337 /// Return the highest [`FenceValue`] among the signalled fences in `active`.
1338 ///
1339 /// As an optimization, assume that we already know that the fence has
1340 /// reached `last_completed`, and don't bother checking fences whose values
1341 /// are less than that: those fences remain in the `active` array only
1342 /// because we haven't called `maintain` yet to clean them up.
1343 ///
1344 /// [`FenceValue`]: crate::FenceValue
1345 fn check_active(
1346 device: &ash::Device,
1347 mut last_completed: crate::FenceValue,
1348 active: &[(crate::FenceValue, vk::Fence)],
1349 ) -> Result<crate::FenceValue, crate::DeviceError> {
1350 for &(value, raw) in active.iter() {
1351 unsafe {
1352 if value > last_completed
1353 && device
1354 .get_fence_status(raw)
1355 .map_err(map_host_device_oom_and_lost_err)?
1356 {
1357 last_completed = value;
1358 }
1359 }
1360 }
1361 Ok(last_completed)
1362 }
1363
1364 /// Return the highest signalled [`FenceValue`] for `self`.
1365 ///
1366 /// [`FenceValue`]: crate::FenceValue
1367 fn get_latest(
1368 &self,
1369 device: &ash::Device,
1370 extension: Option<&ExtensionFn<khr::timeline_semaphore::Device>>,
1371 ) -> Result<crate::FenceValue, crate::DeviceError> {
1372 match *self {
1373 Self::TimelineSemaphore(raw) => unsafe {
1374 Ok(match *extension.unwrap() {
1375 ExtensionFn::Extension(ref ext) => ext
1376 .get_semaphore_counter_value(raw)
1377 .map_err(map_host_device_oom_and_lost_err)?,
1378 ExtensionFn::Promoted => device
1379 .get_semaphore_counter_value(raw)
1380 .map_err(map_host_device_oom_and_lost_err)?,
1381 })
1382 },
1383 Self::FencePool {
1384 last_completed,
1385 ref active,
1386 free: _,
1387 } => Self::check_active(device, last_completed, active),
1388 }
1389 }
1390
1391 /// Trim the internal state of this [`Fence`].
1392 ///
1393 /// This function has no externally visible effect, but you should call it
1394 /// periodically to keep this fence's resource consumption under control.
1395 ///
1396 /// For fences using the [`FencePool`] implementation, this function
1397 /// recycles fences that have been signaled. If you don't call this,
1398 /// [`Queue::submit`] will just keep allocating a new Vulkan fence every
1399 /// time it's called.
1400 ///
1401 /// [`FencePool`]: Fence::FencePool
1402 /// [`Queue::submit`]: crate::Queue::submit
1403 fn maintain(&mut self, device: &ash::Device) -> Result<(), crate::DeviceError> {
1404 match *self {
1405 Self::TimelineSemaphore(_) => {}
1406 Self::FencePool {
1407 ref mut last_completed,
1408 ref mut active,
1409 ref mut free,
1410 } => {
1411 let latest = Self::check_active(device, *last_completed, active)?;
1412 let base_free = free.len();
1413 for &(value, raw) in active.iter() {
1414 if value <= latest {
1415 free.push(raw);
1416 }
1417 }
1418 if free.len() != base_free {
1419 active.retain(|&(value, _)| value > latest);
1420 unsafe { device.reset_fences(&free[base_free..]) }
1421 .map_err(map_device_oom_err)?
1422 }
1423 *last_completed = latest;
1424 }
1425 }
1426 Ok(())
1427 }
1428}
1429
1430impl crate::Queue for Queue {
1431 type A = Api;
1432
1433 unsafe fn submit(
1434 &self,
1435 command_buffers: &[&CommandBuffer],
1436 surface_textures: &[&SurfaceTexture],
1437 (signal_fence, signal_value): (&mut Fence, crate::FenceValue),
1438 ) -> Result<(), crate::DeviceError> {
1439 let mut fence_raw = vk::Fence::null();
1440
1441 let mut wait_stage_masks = Vec::new();
1442 let mut wait_semaphores = Vec::new();
1443 let mut signal_semaphores = SemaphoreList::default();
1444
1445 // Double check that the same swapchain image isn't being given to us multiple times,
1446 // as that will deadlock when we try to lock them all.
1447 debug_assert!(
1448 {
1449 let mut check = HashSet::with_capacity(surface_textures.len());
1450 // We compare the Arcs by pointer, as Eq isn't well defined for SurfaceSemaphores.
1451 for st in surface_textures {
1452 check.insert(Arc::as_ptr(&st.acquire_semaphores) as usize);
1453 check.insert(Arc::as_ptr(&st.present_semaphores) as usize);
1454 }
1455 check.len() == surface_textures.len() * 2
1456 },
1457 "More than one surface texture is being used from the same swapchain. This will cause a deadlock in release."
1458 );
1459
1460 let locked_swapchain_semaphores = surface_textures
1461 .iter()
1462 .map(|st| {
1463 let acquire = st
1464 .acquire_semaphores
1465 .try_lock()
1466 .expect("Failed to lock surface acquire semaphore");
1467 let present = st
1468 .present_semaphores
1469 .try_lock()
1470 .expect("Failed to lock surface present semaphore");
1471
1472 (acquire, present)
1473 })
1474 .collect::<Vec<_>>();
1475
1476 for (mut acquire_semaphore, mut present_semaphores) in locked_swapchain_semaphores {
1477 acquire_semaphore.set_used_fence_value(signal_value);
1478
1479 // If we're the first submission to operate on this image, wait on
1480 // its acquire semaphore, to make sure the presentation engine is
1481 // done with it.
1482 if let Some(sem) = acquire_semaphore.get_acquire_wait_semaphore() {
1483 wait_stage_masks.push(vk::PipelineStageFlags::TOP_OF_PIPE);
1484 wait_semaphores.push(sem);
1485 }
1486
1487 // Get a semaphore to signal when we're done writing to this surface
1488 // image. Presentation of this image will wait for this.
1489 let signal_semaphore = present_semaphores.get_submit_signal_semaphore(&self.device)?;
1490 signal_semaphores.push_binary(signal_semaphore);
1491 }
1492
1493 let mut guard = self.signal_semaphores.lock();
1494 if !guard.is_empty() {
1495 signal_semaphores.append(&mut guard);
1496 }
1497
1498 // In order for submissions to be strictly ordered, we encode a dependency between each submission
1499 // using a pair of semaphores. This adds a wait if it is needed, and signals the next semaphore.
1500 let semaphore_state = self.relay_semaphores.lock().advance(&self.device)?;
1501
1502 if let Some(sem) = semaphore_state.wait {
1503 wait_stage_masks.push(vk::PipelineStageFlags::TOP_OF_PIPE);
1504 wait_semaphores.push(sem);
1505 }
1506
1507 signal_semaphores.push_binary(semaphore_state.signal);
1508
1509 // We need to signal our wgpu::Fence if we have one, this adds it to the signal list.
1510 signal_fence.maintain(&self.device.raw)?;
1511 match *signal_fence {
1512 Fence::TimelineSemaphore(raw) => {
1513 signal_semaphores.push_timeline(raw, signal_value);
1514 }
1515 Fence::FencePool {
1516 ref mut active,
1517 ref mut free,
1518 ..
1519 } => {
1520 fence_raw = match free.pop() {
1521 Some(raw) => raw,
1522 None => unsafe {
1523 self.device
1524 .raw
1525 .create_fence(&vk::FenceCreateInfo::default(), None)
1526 .map_err(map_host_device_oom_err)?
1527 },
1528 };
1529 active.push((signal_value, fence_raw));
1530 }
1531 }
1532
1533 let vk_cmd_buffers = command_buffers
1534 .iter()
1535 .map(|cmd| cmd.raw)
1536 .collect::<Vec<_>>();
1537
1538 let mut vk_info = vk::SubmitInfo::default().command_buffers(&vk_cmd_buffers);
1539
1540 vk_info = vk_info
1541 .wait_semaphores(&wait_semaphores)
1542 .wait_dst_stage_mask(&wait_stage_masks);
1543
1544 let mut vk_timeline_info = mem::MaybeUninit::uninit();
1545 vk_info = signal_semaphores.add_to_submit(vk_info, &mut vk_timeline_info);
1546
1547 profiling::scope!("vkQueueSubmit");
1548 unsafe {
1549 self.device
1550 .raw
1551 .queue_submit(self.raw, &[vk_info], fence_raw)
1552 .map_err(map_host_device_oom_and_lost_err)?
1553 };
1554 Ok(())
1555 }
1556
1557 unsafe fn present(
1558 &self,
1559 surface: &Surface,
1560 texture: SurfaceTexture,
1561 ) -> Result<(), crate::SurfaceError> {
1562 let mut swapchain = surface.swapchain.write();
1563 let ssc = swapchain.as_mut().unwrap();
1564 let mut acquire_semaphore = texture.acquire_semaphores.lock();
1565 let mut present_semaphores = texture.present_semaphores.lock();
1566
1567 let wait_semaphores = present_semaphores.get_present_wait_semaphores();
1568
1569 // Reset the acquire and present semaphores internal state
1570 // to be ready for the next frame.
1571 //
1572 // We do this before the actual call to present to ensure that
1573 // even if this method errors and early outs, we have reset
1574 // the state for next frame.
1575 acquire_semaphore.end_semaphore_usage();
1576 present_semaphores.end_semaphore_usage();
1577
1578 drop(acquire_semaphore);
1579
1580 let swapchains = [ssc.raw];
1581 let image_indices = [texture.index];
1582 let vk_info = vk::PresentInfoKHR::default()
1583 .swapchains(&swapchains)
1584 .image_indices(&image_indices)
1585 .wait_semaphores(&wait_semaphores);
1586
1587 let mut display_timing;
1588 let present_times;
1589 let vk_info = if let Some(present_time) = ssc.next_present_time.take() {
1590 debug_assert!(
1591 ssc.device
1592 .features
1593 .contains(wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING),
1594 "`next_present_time` should only be set if `VULKAN_GOOGLE_DISPLAY_TIMING` is enabled"
1595 );
1596 present_times = [present_time];
1597 display_timing = vk::PresentTimesInfoGOOGLE::default().times(&present_times);
1598 // SAFETY: We know that VK_GOOGLE_display_timing is present because of the safety contract on `next_present_time`.
1599 vk_info.push_next(&mut display_timing)
1600 } else {
1601 vk_info
1602 };
1603
1604 let suboptimal = {
1605 profiling::scope!("vkQueuePresentKHR");
1606 unsafe { self.swapchain_fn.queue_present(self.raw, &vk_info) }.map_err(|error| {
1607 match error {
1608 vk::Result::ERROR_OUT_OF_DATE_KHR => crate::SurfaceError::Outdated,
1609 vk::Result::ERROR_SURFACE_LOST_KHR => crate::SurfaceError::Lost,
1610 // We don't use VK_EXT_full_screen_exclusive
1611 // VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT
1612 _ => map_host_device_oom_and_lost_err(error).into(),
1613 }
1614 })?
1615 };
1616 if suboptimal {
1617 // We treat `VK_SUBOPTIMAL_KHR` as `VK_SUCCESS` on Android.
1618 // On Android 10+, libvulkan's `vkQueuePresentKHR` implementation returns `VK_SUBOPTIMAL_KHR` if not doing pre-rotation
1619 // (i.e `VkSwapchainCreateInfoKHR::preTransform` not being equal to the current device orientation).
1620 // This is always the case when the device orientation is anything other than the identity one, as we unconditionally use `VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR`.
1621 #[cfg(not(target_os = "android"))]
1622 log::warn!("Suboptimal present of frame {}", texture.index);
1623 }
1624 Ok(())
1625 }
1626
1627 unsafe fn get_timestamp_period(&self) -> f32 {
1628 self.device.timestamp_period
1629 }
1630}
1631
1632impl Queue {
1633 pub fn raw_device(&self) -> &ash::Device {
1634 &self.device.raw
1635 }
1636
1637 pub fn add_signal_semaphore(&self, semaphore: vk::Semaphore, semaphore_value: Option<u64>) {
1638 let mut guard = self.signal_semaphores.lock();
1639 if let Some(value) = semaphore_value {
1640 guard.push_timeline(semaphore, value);
1641 } else {
1642 guard.push_binary(semaphore);
1643 }
1644 }
1645}
1646
1647/// Maps
1648///
1649/// - VK_ERROR_OUT_OF_HOST_MEMORY
1650/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1651fn map_host_device_oom_err(err: vk::Result) -> crate::DeviceError {
1652 match err {
1653 vk::Result::ERROR_OUT_OF_HOST_MEMORY | vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => {
1654 get_oom_err(err)
1655 }
1656 e => get_unexpected_err(e),
1657 }
1658}
1659
1660/// Maps
1661///
1662/// - VK_ERROR_OUT_OF_HOST_MEMORY
1663/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1664/// - VK_ERROR_DEVICE_LOST
1665fn map_host_device_oom_and_lost_err(err: vk::Result) -> crate::DeviceError {
1666 match err {
1667 vk::Result::ERROR_DEVICE_LOST => get_lost_err(),
1668 other => map_host_device_oom_err(other),
1669 }
1670}
1671
1672/// Maps
1673///
1674/// - VK_ERROR_OUT_OF_HOST_MEMORY
1675/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1676/// - VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1677fn map_host_device_oom_and_ioca_err(err: vk::Result) -> crate::DeviceError {
1678 // We don't use VK_KHR_buffer_device_address
1679 // VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1680 map_host_device_oom_err(err)
1681}
1682
1683/// Maps
1684///
1685/// - VK_ERROR_OUT_OF_HOST_MEMORY
1686fn map_host_oom_err(err: vk::Result) -> crate::DeviceError {
1687 match err {
1688 vk::Result::ERROR_OUT_OF_HOST_MEMORY => get_oom_err(err),
1689 e => get_unexpected_err(e),
1690 }
1691}
1692
1693/// Maps
1694///
1695/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1696fn map_device_oom_err(err: vk::Result) -> crate::DeviceError {
1697 match err {
1698 vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => get_oom_err(err),
1699 e => get_unexpected_err(e),
1700 }
1701}
1702
1703/// Maps
1704///
1705/// - VK_ERROR_OUT_OF_HOST_MEMORY
1706/// - VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1707fn map_host_oom_and_ioca_err(err: vk::Result) -> crate::DeviceError {
1708 // We don't use VK_KHR_buffer_device_address
1709 // VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1710 map_host_oom_err(err)
1711}
1712
1713/// Maps
1714///
1715/// - VK_ERROR_OUT_OF_HOST_MEMORY
1716/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1717/// - VK_PIPELINE_COMPILE_REQUIRED_EXT
1718/// - VK_ERROR_INVALID_SHADER_NV
1719fn map_pipeline_err(err: vk::Result) -> crate::DeviceError {
1720 // We don't use VK_EXT_pipeline_creation_cache_control
1721 // VK_PIPELINE_COMPILE_REQUIRED_EXT
1722 // We don't use VK_NV_glsl_shader
1723 // VK_ERROR_INVALID_SHADER_NV
1724 map_host_device_oom_err(err)
1725}
1726
1727/// Returns [`crate::DeviceError::Unexpected`] or panics if the `internal_error_panic`
1728/// feature flag is enabled.
1729fn get_unexpected_err(_err: vk::Result) -> crate::DeviceError {
1730 #[cfg(feature = "internal_error_panic")]
1731 panic!("Unexpected Vulkan error: {_err:?}");
1732
1733 #[allow(unreachable_code)]
1734 crate::DeviceError::Unexpected
1735}
1736
1737/// Returns [`crate::DeviceError::OutOfMemory`].
1738fn get_oom_err(_err: vk::Result) -> crate::DeviceError {
1739 crate::DeviceError::OutOfMemory
1740}
1741
1742/// Returns [`crate::DeviceError::Lost`] or panics if the `device_lost_panic`
1743/// feature flag is enabled.
1744fn get_lost_err() -> crate::DeviceError {
1745 #[cfg(feature = "device_lost_panic")]
1746 panic!("Device lost");
1747
1748 #[allow(unreachable_code)]
1749 crate::DeviceError::Lost
1750}
1751
1752#[derive(Clone, Copy, Pod, Zeroable)]
1753#[repr(C)]
1754struct RawTlasInstance {
1755 transform: [f32; 12],
1756 custom_data_and_mask: u32,
1757 shader_binding_table_record_offset_and_flags: u32,
1758 acceleration_structure_reference: u64,
1759}
1760
1761/// Arguments to the [`CreateDeviceCallback`].
1762pub struct CreateDeviceCallbackArgs<'arg, 'pnext, 'this>
1763where
1764 'this: 'pnext,
1765{
1766 /// The extensions to enable for the device. You must not remove anything from this list,
1767 /// but you may add to it.
1768 pub extensions: &'arg mut Vec<&'static CStr>,
1769 /// The physical device features to enable. You may enable features, but must not disable any.
1770 pub device_features: &'arg mut PhysicalDeviceFeatures,
1771 /// The queue create infos for the device. You may add or modify queue create infos as needed.
1772 pub queue_create_infos: &'arg mut Vec<vk::DeviceQueueCreateInfo<'pnext>>,
1773 /// The create info for the device. You may add or modify things in the pnext chain, but
1774 /// do not turn features off. Additionally, do not add things to the list of extensions,
1775 /// or to the feature set, as all changes to that member will be overwritten.
1776 pub create_info: &'arg mut vk::DeviceCreateInfo<'pnext>,
1777 /// We need to have `'this` in the struct, so we can declare that all lifetimes coming from
1778 /// captures in the closure will live longer (and hence satisfy) `'pnext`. However, we
1779 /// don't actually directly use `'this`
1780 _phantom: PhantomData<&'this ()>,
1781}
1782
1783/// Callback to allow changing the vulkan device creation parameters.
1784///
1785/// # Safety:
1786/// - If you want to add extensions, add the to the `Vec<'static CStr>` not the create info,
1787/// as the create info value will be overwritten.
1788/// - Callback must not remove features.
1789/// - Callback must not change anything to what the instance does not support.
1790pub type CreateDeviceCallback<'this> =
1791 dyn for<'arg, 'pnext> FnOnce(CreateDeviceCallbackArgs<'arg, 'pnext, 'this>) + 'this;
1792
1793/// Arguments to the [`CreateInstanceCallback`].
1794pub struct CreateInstanceCallbackArgs<'arg, 'pnext, 'this>
1795where
1796 'this: 'pnext,
1797{
1798 /// The extensions to enable for the instance. You must not remove anything from this list,
1799 /// but you may add to it.
1800 pub extensions: &'arg mut Vec<&'static CStr>,
1801 /// The create info for the instance. You may add or modify things in the pnext chain, but
1802 /// do not turn features off. Additionally, do not add things to the list of extensions,
1803 /// all changes to that member will be overwritten.
1804 pub create_info: &'arg mut vk::InstanceCreateInfo<'pnext>,
1805 /// Vulkan entry point.
1806 pub entry: &'arg ash::Entry,
1807 /// We need to have `'this` in the struct, so we can declare that all lifetimes coming from
1808 /// captures in the closure will live longer (and hence satisfy) `'pnext`. However, we
1809 /// don't actually directly use `'this`
1810 _phantom: PhantomData<&'this ()>,
1811}
1812
1813/// Callback to allow changing the vulkan instance creation parameters.
1814///
1815/// # Safety:
1816/// - If you want to add extensions, add the to the `Vec<'static CStr>` not the create info,
1817/// as the create info value will be overwritten.
1818/// - Callback must not remove features.
1819/// - Callback must not change anything to what the instance does not support.
1820pub type CreateInstanceCallback<'this> =
1821 dyn for<'arg, 'pnext> FnOnce(CreateInstanceCallbackArgs<'arg, 'pnext, 'this>) + 'this;