wgpu_hal/vulkan/mod.rs
1/*!
2# Vulkan API internals.
3
4## Stack memory
5
6Ash expects slices, which we don't generally have available.
7We cope with this requirement by the combination of the following ways:
8 - temporarily allocating `Vec` on heap, where overhead is permitted
9 - growing temporary local storage
10
11## Framebuffers and Render passes
12
13Render passes are cached on the device and kept forever.
14
15Framebuffers are also cached on the device, but they are removed when
16any of the image views (they have) gets removed.
17If Vulkan supports image-less framebuffers,
18then the actual views are excluded from the framebuffer key.
19
20## Fences
21
22If timeline semaphores are available, they are used 1:1 with wgpu-hal fences.
23Otherwise, we manage a pool of `VkFence` objects behind each `hal::Fence`.
24
25!*/
26
27mod adapter;
28mod command;
29mod conv;
30mod device;
31mod drm;
32mod instance;
33mod sampler;
34mod semaphore_list;
35
36pub use adapter::PhysicalDeviceFeatures;
37
38use alloc::{boxed::Box, ffi::CString, sync::Arc, vec::Vec};
39use core::{borrow::Borrow, ffi::CStr, fmt, marker::PhantomData, mem, num::NonZeroU32};
40
41use arrayvec::ArrayVec;
42use ash::{ext, khr, vk};
43use bytemuck::{Pod, Zeroable};
44use hashbrown::HashSet;
45use parking_lot::{Mutex, RwLock};
46
47use naga::FastHashMap;
48use wgt::InternalCounter;
49
50use semaphore_list::SemaphoreList;
51
52const MAX_TOTAL_ATTACHMENTS: usize = crate::MAX_COLOR_ATTACHMENTS * 2 + 1;
53
54#[derive(Clone, Debug)]
55pub struct Api;
56
57impl crate::Api for Api {
58 const VARIANT: wgt::Backend = wgt::Backend::Vulkan;
59
60 type Instance = Instance;
61 type Surface = Surface;
62 type Adapter = Adapter;
63 type Device = Device;
64
65 type Queue = Queue;
66 type CommandEncoder = CommandEncoder;
67 type CommandBuffer = CommandBuffer;
68
69 type Buffer = Buffer;
70 type Texture = Texture;
71 type SurfaceTexture = SurfaceTexture;
72 type TextureView = TextureView;
73 type Sampler = Sampler;
74 type QuerySet = QuerySet;
75 type Fence = Fence;
76 type AccelerationStructure = AccelerationStructure;
77 type PipelineCache = PipelineCache;
78
79 type BindGroupLayout = BindGroupLayout;
80 type BindGroup = BindGroup;
81 type PipelineLayout = PipelineLayout;
82 type ShaderModule = ShaderModule;
83 type RenderPipeline = RenderPipeline;
84 type ComputePipeline = ComputePipeline;
85}
86
87crate::impl_dyn_resource!(
88 Adapter,
89 AccelerationStructure,
90 BindGroup,
91 BindGroupLayout,
92 Buffer,
93 CommandBuffer,
94 CommandEncoder,
95 ComputePipeline,
96 Device,
97 Fence,
98 Instance,
99 PipelineCache,
100 PipelineLayout,
101 QuerySet,
102 Queue,
103 RenderPipeline,
104 Sampler,
105 ShaderModule,
106 Surface,
107 SurfaceTexture,
108 Texture,
109 TextureView
110);
111
112struct DebugUtils {
113 extension: ext::debug_utils::Instance,
114 messenger: vk::DebugUtilsMessengerEXT,
115
116 /// Owning pointer to the debug messenger callback user data.
117 ///
118 /// `InstanceShared::drop` destroys the debug messenger before
119 /// dropping this, so the callback should never receive a dangling
120 /// user data pointer.
121 #[allow(dead_code)]
122 callback_data: Box<DebugUtilsMessengerUserData>,
123}
124
125pub struct DebugUtilsCreateInfo {
126 severity: vk::DebugUtilsMessageSeverityFlagsEXT,
127 message_type: vk::DebugUtilsMessageTypeFlagsEXT,
128 callback_data: Box<DebugUtilsMessengerUserData>,
129}
130
131#[derive(Debug)]
132/// The properties related to the validation layer needed for the
133/// DebugUtilsMessenger for their workarounds
134struct ValidationLayerProperties {
135 /// Validation layer description, from `vk::LayerProperties`.
136 layer_description: CString,
137
138 /// Validation layer specification version, from `vk::LayerProperties`.
139 layer_spec_version: u32,
140}
141
142/// User data needed by `instance::debug_utils_messenger_callback`.
143///
144/// When we create the [`vk::DebugUtilsMessengerEXT`], the `pUserData`
145/// pointer refers to one of these values.
146#[derive(Debug)]
147pub struct DebugUtilsMessengerUserData {
148 /// The properties related to the validation layer, if present
149 validation_layer_properties: Option<ValidationLayerProperties>,
150
151 /// If the OBS layer is present. OBS never increments the version of their layer,
152 /// so there's no reason to have the version.
153 has_obs_layer: bool,
154}
155
156pub struct InstanceShared {
157 raw: ash::Instance,
158 extensions: Vec<&'static CStr>,
159 drop_guard: Option<crate::DropGuard>,
160 flags: wgt::InstanceFlags,
161 memory_budget_thresholds: wgt::MemoryBudgetThresholds,
162 debug_utils: Option<DebugUtils>,
163 get_physical_device_properties: Option<khr::get_physical_device_properties2::Instance>,
164 entry: ash::Entry,
165 has_nv_optimus: bool,
166 android_sdk_version: u32,
167 /// The instance API version.
168 ///
169 /// Which is the version of Vulkan supported for instance-level functionality.
170 ///
171 /// It is associated with a `VkInstance` and its children,
172 /// except for a `VkPhysicalDevice` and its children.
173 instance_api_version: u32,
174}
175
176pub struct Instance {
177 shared: Arc<InstanceShared>,
178}
179
180/// Semaphore used to acquire a swapchain image.
181#[derive(Debug)]
182struct SwapchainAcquireSemaphore {
183 /// A semaphore that is signaled when this image is safe for us to modify.
184 ///
185 /// When [`vkAcquireNextImageKHR`] returns the index of the next swapchain
186 /// image that we should use, that image may actually still be in use by the
187 /// presentation engine, and is not yet safe to modify. However, that
188 /// function does accept a semaphore that it will signal when the image is
189 /// indeed safe to begin messing with.
190 ///
191 /// This semaphore is:
192 ///
193 /// - waited for by the first queue submission to operate on this image
194 /// since it was acquired, and
195 ///
196 /// - signaled by [`vkAcquireNextImageKHR`] when the acquired image is ready
197 /// for us to use.
198 ///
199 /// [`vkAcquireNextImageKHR`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkAcquireNextImageKHR
200 acquire: vk::Semaphore,
201
202 /// True if the next command submission operating on this image should wait
203 /// for [`acquire`].
204 ///
205 /// We must wait for `acquire` before drawing to this swapchain image, but
206 /// because `wgpu-hal` queue submissions are always strongly ordered, only
207 /// the first submission that works with a swapchain image actually needs to
208 /// wait. We set this flag when this image is acquired, and clear it the
209 /// first time it's passed to [`Queue::submit`] as a surface texture.
210 ///
211 /// Additionally, semaphores can only be waited on once, so we need to ensure
212 /// that we only actually pass this semaphore to the first submission that
213 /// uses that image.
214 ///
215 /// [`acquire`]: SwapchainAcquireSemaphore::acquire
216 /// [`Queue::submit`]: crate::Queue::submit
217 should_wait_for_acquire: bool,
218
219 /// The fence value of the last command submission that wrote to this image.
220 ///
221 /// The next time we try to acquire this image, we'll block until
222 /// this submission finishes, proving that [`acquire`] is ready to
223 /// pass to `vkAcquireNextImageKHR` again.
224 ///
225 /// [`acquire`]: SwapchainAcquireSemaphore::acquire
226 previously_used_submission_index: crate::FenceValue,
227}
228
229impl SwapchainAcquireSemaphore {
230 fn new(device: &DeviceShared, index: usize) -> Result<Self, crate::DeviceError> {
231 Ok(Self {
232 acquire: device
233 .new_binary_semaphore(&format!("SwapchainImageSemaphore: Index {index} acquire"))?,
234 should_wait_for_acquire: true,
235 previously_used_submission_index: 0,
236 })
237 }
238
239 /// Sets the fence value which the next acquire will wait for. This prevents
240 /// the semaphore from being used while the previous submission is still in flight.
241 fn set_used_fence_value(&mut self, value: crate::FenceValue) {
242 self.previously_used_submission_index = value;
243 }
244
245 /// Return the semaphore that commands drawing to this image should wait for, if any.
246 ///
247 /// This only returns `Some` once per acquisition; see
248 /// [`SwapchainAcquireSemaphore::should_wait_for_acquire`] for details.
249 fn get_acquire_wait_semaphore(&mut self) -> Option<vk::Semaphore> {
250 if self.should_wait_for_acquire {
251 self.should_wait_for_acquire = false;
252 Some(self.acquire)
253 } else {
254 None
255 }
256 }
257
258 /// Indicates the cpu-side usage of this semaphore has finished for the frame,
259 /// so reset internal state to be ready for the next frame.
260 fn end_semaphore_usage(&mut self) {
261 // Reset the acquire semaphore, so that the next time we acquire this
262 // image, we can wait for it again.
263 self.should_wait_for_acquire = true;
264 }
265
266 unsafe fn destroy(&self, device: &ash::Device) {
267 unsafe {
268 device.destroy_semaphore(self.acquire, None);
269 }
270 }
271}
272
273#[derive(Debug)]
274struct SwapchainPresentSemaphores {
275 /// A pool of semaphores for ordering presentation after drawing.
276 ///
277 /// The first [`present_index`] semaphores in this vector are:
278 ///
279 /// - all waited on by the call to [`vkQueuePresentKHR`] that presents this
280 /// image, and
281 ///
282 /// - each signaled by some [`vkQueueSubmit`] queue submission that draws to
283 /// this image, when the submission finishes execution.
284 ///
285 /// This vector accumulates one semaphore per submission that writes to this
286 /// image. This is awkward, but hard to avoid: [`vkQueuePresentKHR`]
287 /// requires a semaphore to order it with respect to drawing commands, and
288 /// we can't attach new completion semaphores to a command submission after
289 /// it's been submitted. This means that, at submission time, we must create
290 /// the semaphore we might need if the caller's next action is to enqueue a
291 /// presentation of this image.
292 ///
293 /// An alternative strategy would be for presentation to enqueue an empty
294 /// submit, ordered relative to other submits in the usual way, and
295 /// signaling a single presentation semaphore. But we suspect that submits
296 /// are usually expensive enough, and semaphores usually cheap enough, that
297 /// performance-sensitive users will avoid making many submits, so that the
298 /// cost of accumulated semaphores will usually be less than the cost of an
299 /// additional submit.
300 ///
301 /// Only the first [`present_index`] semaphores in the vector are actually
302 /// going to be signalled by submitted commands, and need to be waited for
303 /// by the next present call. Any semaphores beyond that index were created
304 /// for prior presents and are simply being retained for recycling.
305 ///
306 /// [`present_index`]: SwapchainPresentSemaphores::present_index
307 /// [`vkQueuePresentKHR`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueuePresentKHR
308 /// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
309 present: Vec<vk::Semaphore>,
310
311 /// The number of semaphores in [`present`] to be signalled for this submission.
312 ///
313 /// [`present`]: SwapchainPresentSemaphores::present
314 present_index: usize,
315
316 /// Which image this semaphore set is used for.
317 frame_index: usize,
318}
319
320impl SwapchainPresentSemaphores {
321 pub fn new(frame_index: usize) -> Self {
322 Self {
323 present: Vec::new(),
324 present_index: 0,
325 frame_index,
326 }
327 }
328
329 /// Return the semaphore that the next submission that writes to this image should
330 /// signal when it's done.
331 ///
332 /// See [`SwapchainPresentSemaphores::present`] for details.
333 fn get_submit_signal_semaphore(
334 &mut self,
335 device: &DeviceShared,
336 ) -> Result<vk::Semaphore, crate::DeviceError> {
337 // Try to recycle a semaphore we created for a previous presentation.
338 let sem = match self.present.get(self.present_index) {
339 Some(sem) => *sem,
340 None => {
341 let sem = device.new_binary_semaphore(&format!(
342 "SwapchainImageSemaphore: Image {} present semaphore {}",
343 self.frame_index, self.present_index
344 ))?;
345 self.present.push(sem);
346 sem
347 }
348 };
349
350 self.present_index += 1;
351
352 Ok(sem)
353 }
354
355 /// Indicates the cpu-side usage of this semaphore has finished for the frame,
356 /// so reset internal state to be ready for the next frame.
357 fn end_semaphore_usage(&mut self) {
358 // Reset the index to 0, so that the next time we get a semaphore, we
359 // start from the beginning of the list.
360 self.present_index = 0;
361 }
362
363 /// Return the semaphores that a presentation of this image should wait on.
364 ///
365 /// Return a slice of semaphores that the call to [`vkQueueSubmit`] that
366 /// ends this image's acquisition should wait for. See
367 /// [`SwapchainPresentSemaphores::present`] for details.
368 ///
369 /// Reset `self` to be ready for the next acquisition cycle.
370 ///
371 /// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
372 fn get_present_wait_semaphores(&mut self) -> Vec<vk::Semaphore> {
373 self.present[0..self.present_index].to_vec()
374 }
375
376 unsafe fn destroy(&self, device: &ash::Device) {
377 unsafe {
378 for sem in &self.present {
379 device.destroy_semaphore(*sem, None);
380 }
381 }
382 }
383}
384
385struct Swapchain {
386 raw: vk::SwapchainKHR,
387 functor: khr::swapchain::Device,
388 device: Arc<DeviceShared>,
389 images: Vec<vk::Image>,
390 config: crate::SurfaceConfiguration,
391
392 /// Semaphores used between image acquisition and the first submission
393 /// that uses that image. This is indexed using [`next_acquire_index`].
394 ///
395 /// Because we need to provide this to [`vkAcquireNextImageKHR`], we haven't
396 /// received the swapchain image index for the frame yet, so we cannot use
397 /// that to index it.
398 ///
399 /// Before we pass this to [`vkAcquireNextImageKHR`], we ensure that we wait on
400 /// the submission indicated by [`previously_used_submission_index`]. This enusres
401 /// the semaphore is no longer in use before we use it.
402 ///
403 /// [`next_acquire_index`]: Swapchain::next_acquire_index
404 /// [`vkAcquireNextImageKHR`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkAcquireNextImageKHR
405 /// [`previously_used_submission_index`]: SwapchainAcquireSemaphore::previously_used_submission_index
406 acquire_semaphores: Vec<Arc<Mutex<SwapchainAcquireSemaphore>>>,
407 /// The index of the next acquire semaphore to use.
408 ///
409 /// This is incremented each time we acquire a new image, and wraps around
410 /// to 0 when it reaches the end of [`acquire_semaphores`].
411 ///
412 /// [`acquire_semaphores`]: Swapchain::acquire_semaphores
413 next_acquire_index: usize,
414
415 /// Semaphore sets used between all submissions that write to an image and
416 /// the presentation of that image.
417 ///
418 /// This is indexed by the swapchain image index returned by
419 /// [`vkAcquireNextImageKHR`].
420 ///
421 /// We know it is safe to use these semaphores because use them
422 /// _after_ the acquire semaphore. Because the acquire semaphore
423 /// has been signaled, the previous presentation using that image
424 /// is known-finished, so this semaphore is no longer in use.
425 ///
426 /// [`vkAcquireNextImageKHR`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkAcquireNextImageKHR
427 present_semaphores: Vec<Arc<Mutex<SwapchainPresentSemaphores>>>,
428
429 /// The present timing information which will be set in the next call to [`present()`](crate::Queue::present()).
430 ///
431 /// # Safety
432 ///
433 /// This must only be set if [`wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING`] is enabled, and
434 /// so the VK_GOOGLE_display_timing extension is present.
435 next_present_time: Option<vk::PresentTimeGOOGLE>,
436}
437
438impl Swapchain {
439 /// Mark the current frame finished, advancing to the next acquire semaphore.
440 fn advance_acquire_semaphore(&mut self) {
441 let semaphore_count = self.acquire_semaphores.len();
442 self.next_acquire_index = (self.next_acquire_index + 1) % semaphore_count;
443 }
444
445 /// Get the next acquire semaphore that should be used with this swapchain.
446 fn get_acquire_semaphore(&self) -> Arc<Mutex<SwapchainAcquireSemaphore>> {
447 self.acquire_semaphores[self.next_acquire_index].clone()
448 }
449
450 /// Get the set of present semaphores that should be used with the given image index.
451 fn get_present_semaphores(&self, index: u32) -> Arc<Mutex<SwapchainPresentSemaphores>> {
452 self.present_semaphores[index as usize].clone()
453 }
454}
455
456pub struct Surface {
457 raw: vk::SurfaceKHR,
458 functor: khr::surface::Instance,
459 instance: Arc<InstanceShared>,
460 swapchain: RwLock<Option<Swapchain>>,
461}
462
463impl Surface {
464 pub unsafe fn raw_handle(&self) -> vk::SurfaceKHR {
465 self.raw
466 }
467
468 /// Get the raw Vulkan swapchain associated with this surface.
469 ///
470 /// Returns [`None`] if the surface is not configured.
471 pub fn raw_swapchain(&self) -> Option<vk::SwapchainKHR> {
472 let read = self.swapchain.read();
473 read.as_ref().map(|it| it.raw)
474 }
475
476 /// Set the present timing information which will be used for the next [presentation](crate::Queue::present()) of this surface,
477 /// using [VK_GOOGLE_display_timing].
478 ///
479 /// This can be used to give an id to presentations, for future use of [`vk::PastPresentationTimingGOOGLE`].
480 /// Note that `wgpu-hal` does *not* provide a way to use that API - you should manually access this through [`ash`].
481 ///
482 /// This can also be used to add a "not before" timestamp to the presentation.
483 ///
484 /// The exact semantics of the fields are also documented in the [specification](https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPresentTimeGOOGLE.html) for the extension.
485 ///
486 /// # Panics
487 ///
488 /// - If the surface hasn't been configured.
489 /// - If the device doesn't [support present timing](wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING).
490 ///
491 /// [VK_GOOGLE_display_timing]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_GOOGLE_display_timing.html
492 #[track_caller]
493 pub fn set_next_present_time(&self, present_timing: vk::PresentTimeGOOGLE) {
494 let mut swapchain = self.swapchain.write();
495 let swapchain = swapchain
496 .as_mut()
497 .expect("Surface should have been configured");
498 let features = wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING;
499 if swapchain.device.features.contains(features) {
500 swapchain.next_present_time = Some(present_timing);
501 } else {
502 // Ideally we'd use something like `device.required_features` here, but that's in `wgpu-core`, which we are a dependency of
503 panic!(
504 concat!(
505 "Tried to set display timing properties ",
506 "without the corresponding feature ({:?}) enabled."
507 ),
508 features
509 );
510 }
511 }
512}
513
514#[derive(Debug)]
515pub struct SurfaceTexture {
516 index: u32,
517 texture: Texture,
518 acquire_semaphores: Arc<Mutex<SwapchainAcquireSemaphore>>,
519 present_semaphores: Arc<Mutex<SwapchainPresentSemaphores>>,
520}
521
522impl crate::DynSurfaceTexture for SurfaceTexture {}
523
524impl Borrow<Texture> for SurfaceTexture {
525 fn borrow(&self) -> &Texture {
526 &self.texture
527 }
528}
529
530impl Borrow<dyn crate::DynTexture> for SurfaceTexture {
531 fn borrow(&self) -> &dyn crate::DynTexture {
532 &self.texture
533 }
534}
535
536pub struct Adapter {
537 raw: vk::PhysicalDevice,
538 instance: Arc<InstanceShared>,
539 //queue_families: Vec<vk::QueueFamilyProperties>,
540 known_memory_flags: vk::MemoryPropertyFlags,
541 phd_capabilities: adapter::PhysicalDeviceProperties,
542 phd_features: PhysicalDeviceFeatures,
543 downlevel_flags: wgt::DownlevelFlags,
544 private_caps: PrivateCapabilities,
545 workarounds: Workarounds,
546}
547
548// TODO there's no reason why this can't be unified--the function pointers should all be the same--it's not clear how to do this with `ash`.
549enum ExtensionFn<T> {
550 /// The loaded function pointer struct for an extension.
551 Extension(T),
552 /// The extension was promoted to a core version of Vulkan and the functions on `ash`'s `DeviceV1_x` traits should be used.
553 Promoted,
554}
555
556struct DeviceExtensionFunctions {
557 debug_utils: Option<ext::debug_utils::Device>,
558 draw_indirect_count: Option<khr::draw_indirect_count::Device>,
559 timeline_semaphore: Option<ExtensionFn<khr::timeline_semaphore::Device>>,
560 ray_tracing: Option<RayTracingDeviceExtensionFunctions>,
561 mesh_shading: Option<ext::mesh_shader::Device>,
562}
563
564struct RayTracingDeviceExtensionFunctions {
565 acceleration_structure: khr::acceleration_structure::Device,
566 buffer_device_address: khr::buffer_device_address::Device,
567}
568
569/// Set of internal capabilities, which don't show up in the exposed
570/// device geometry, but affect the code paths taken internally.
571#[derive(Clone, Debug)]
572struct PrivateCapabilities {
573 image_view_usage: bool,
574 timeline_semaphores: bool,
575 texture_d24: bool,
576 texture_d24_s8: bool,
577 texture_s8: bool,
578 /// Ability to present contents to any screen. Only needed to work around broken platform configurations.
579 can_present: bool,
580 non_coherent_map_mask: wgt::BufferAddress,
581 multi_draw_indirect: bool,
582
583 /// True if this adapter advertises the [`robustBufferAccess`][vrba] feature.
584 ///
585 /// Note that Vulkan's `robustBufferAccess` is not sufficient to implement
586 /// `wgpu_hal`'s guarantee that shaders will not access buffer contents via
587 /// a given bindgroup binding outside that binding's [accessible
588 /// region][ar]. Enabling `robustBufferAccess` does ensure that
589 /// out-of-bounds reads and writes are not undefined behavior (that's good),
590 /// but still permits out-of-bounds reads to return data from anywhere
591 /// within the buffer, not just the accessible region.
592 ///
593 /// [ar]: ../struct.BufferBinding.html#accessible-region
594 /// [vrba]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#features-robustBufferAccess
595 robust_buffer_access: bool,
596
597 robust_image_access: bool,
598
599 /// True if this adapter supports the [`VK_EXT_robustness2`] extension's
600 /// [`robustBufferAccess2`] feature.
601 ///
602 /// This is sufficient to implement `wgpu_hal`'s [required bounds-checking][ar] of
603 /// shader accesses to buffer contents. If this feature is not available,
604 /// this backend must have Naga inject bounds checks in the generated
605 /// SPIR-V.
606 ///
607 /// [`VK_EXT_robustness2`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_EXT_robustness2.html
608 /// [`robustBufferAccess2`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPhysicalDeviceRobustness2FeaturesEXT.html#features-robustBufferAccess2
609 /// [ar]: ../struct.BufferBinding.html#accessible-region
610 robust_buffer_access2: bool,
611
612 robust_image_access2: bool,
613 zero_initialize_workgroup_memory: bool,
614 image_format_list: bool,
615 maximum_samplers: u32,
616
617 /// True if this adapter supports the [`VK_KHR_shader_integer_dot_product`] extension
618 /// (promoted to Vulkan 1.3).
619 ///
620 /// This is used to generate optimized code for WGSL's `dot4{I, U}8Packed`.
621 ///
622 /// [`VK_KHR_shader_integer_dot_product`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_KHR_shader_integer_dot_product.html
623 shader_integer_dot_product: bool,
624
625 /// True if this adapter supports 8-bit integers provided by the
626 /// [`VK_KHR_shader_float16_int8`] extension (promoted to Vulkan 1.2).
627 ///
628 /// Allows shaders to declare the "Int8" capability. Note, however, that this
629 /// feature alone allows the use of 8-bit integers "only in the `Private`,
630 /// `Workgroup` (for non-Block variables), and `Function` storage classes"
631 /// ([see spec]). To use 8-bit integers in the interface storage classes (e.g.,
632 /// `StorageBuffer`), you also need to enable the corresponding feature in
633 /// `VkPhysicalDevice8BitStorageFeatures` and declare the corresponding SPIR-V
634 /// capability (e.g., `StorageBuffer8BitAccess`).
635 ///
636 /// [`VK_KHR_shader_float16_int8`]: https://registry.khronos.org/vulkan/specs/latest/man/html/VK_KHR_shader_float16_int8.html
637 /// [see spec]: https://registry.khronos.org/vulkan/specs/latest/man/html/VkPhysicalDeviceShaderFloat16Int8Features.html#extension-features-shaderInt8
638 shader_int8: bool,
639}
640
641bitflags::bitflags!(
642 /// Workaround flags.
643 #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
644 pub struct Workarounds: u32 {
645 /// Only generate SPIR-V for one entry point at a time.
646 const SEPARATE_ENTRY_POINTS = 0x1;
647 /// Qualcomm OOMs when there are zero color attachments but a non-null pointer
648 /// to a subpass resolve attachment array. This nulls out that pointer in that case.
649 const EMPTY_RESOLVE_ATTACHMENT_LISTS = 0x2;
650 /// If the following code returns false, then nvidia will end up filling the wrong range.
651 ///
652 /// ```skip
653 /// fn nvidia_succeeds() -> bool {
654 /// # let (copy_length, start_offset) = (0, 0);
655 /// if copy_length >= 4096 {
656 /// if start_offset % 16 != 0 {
657 /// if copy_length == 4096 {
658 /// return true;
659 /// }
660 /// if copy_length % 16 == 0 {
661 /// return false;
662 /// }
663 /// }
664 /// }
665 /// true
666 /// }
667 /// ```
668 ///
669 /// As such, we need to make sure all calls to vkCmdFillBuffer are aligned to 16 bytes
670 /// if they cover a range of 4096 bytes or more.
671 const FORCE_FILL_BUFFER_WITH_SIZE_GREATER_4096_ALIGNED_OFFSET_16 = 0x4;
672 }
673);
674
675#[derive(Clone, Debug, Eq, Hash, PartialEq)]
676struct AttachmentKey {
677 format: vk::Format,
678 layout: vk::ImageLayout,
679 ops: crate::AttachmentOps,
680}
681
682impl AttachmentKey {
683 /// Returns an attachment key for a compatible attachment.
684 fn compatible(format: vk::Format, layout: vk::ImageLayout) -> Self {
685 Self {
686 format,
687 layout,
688 ops: crate::AttachmentOps::all(),
689 }
690 }
691}
692
693#[derive(Clone, Eq, Hash, PartialEq)]
694struct ColorAttachmentKey {
695 base: AttachmentKey,
696 resolve: Option<AttachmentKey>,
697}
698
699#[derive(Clone, Eq, Hash, PartialEq)]
700struct DepthStencilAttachmentKey {
701 base: AttachmentKey,
702 stencil_ops: crate::AttachmentOps,
703}
704
705#[derive(Clone, Eq, Default, Hash, PartialEq)]
706struct RenderPassKey {
707 colors: ArrayVec<Option<ColorAttachmentKey>, { crate::MAX_COLOR_ATTACHMENTS }>,
708 depth_stencil: Option<DepthStencilAttachmentKey>,
709 sample_count: u32,
710 multiview: Option<NonZeroU32>,
711}
712
713struct DeviceShared {
714 raw: ash::Device,
715 family_index: u32,
716 queue_index: u32,
717 raw_queue: vk::Queue,
718 drop_guard: Option<crate::DropGuard>,
719 instance: Arc<InstanceShared>,
720 physical_device: vk::PhysicalDevice,
721 enabled_extensions: Vec<&'static CStr>,
722 extension_fns: DeviceExtensionFunctions,
723 vendor_id: u32,
724 pipeline_cache_validation_key: [u8; 16],
725 timestamp_period: f32,
726 private_caps: PrivateCapabilities,
727 workarounds: Workarounds,
728 features: wgt::Features,
729 render_passes: Mutex<FastHashMap<RenderPassKey, vk::RenderPass>>,
730 sampler_cache: Mutex<sampler::SamplerCache>,
731 memory_allocations_counter: InternalCounter,
732
733 /// Because we have cached framebuffers which are not deleted from until
734 /// the device is destroyed, if the implementation of vulkan re-uses handles
735 /// we need some way to differentiate between the old handle and the new handle.
736 /// This factory allows us to have a dedicated identity value for each texture.
737 texture_identity_factory: ResourceIdentityFactory<vk::Image>,
738 /// As above, for texture views.
739 texture_view_identity_factory: ResourceIdentityFactory<vk::ImageView>,
740}
741
742impl Drop for DeviceShared {
743 fn drop(&mut self) {
744 for &raw in self.render_passes.lock().values() {
745 unsafe { self.raw.destroy_render_pass(raw, None) };
746 }
747 if self.drop_guard.is_none() {
748 unsafe { self.raw.destroy_device(None) };
749 }
750 }
751}
752
753pub struct Device {
754 shared: Arc<DeviceShared>,
755 mem_allocator: Mutex<gpu_alloc::GpuAllocator<vk::DeviceMemory>>,
756 desc_allocator:
757 Mutex<gpu_descriptor::DescriptorAllocator<vk::DescriptorPool, vk::DescriptorSet>>,
758 valid_ash_memory_types: u32,
759 naga_options: naga::back::spv::Options<'static>,
760 #[cfg(feature = "renderdoc")]
761 render_doc: crate::auxil::renderdoc::RenderDoc,
762 counters: Arc<wgt::HalCounters>,
763}
764
765impl Drop for Device {
766 fn drop(&mut self) {
767 unsafe { self.mem_allocator.lock().cleanup(&*self.shared) };
768 unsafe { self.desc_allocator.lock().cleanup(&*self.shared) };
769 }
770}
771
772/// Semaphores for forcing queue submissions to run in order.
773///
774/// The [`wgpu_hal::Queue`] trait promises that if two calls to [`submit`] are
775/// ordered, then the first submission will finish on the GPU before the second
776/// submission begins. To get this behavior on Vulkan we need to pass semaphores
777/// to [`vkQueueSubmit`] for the commands to wait on before beginning execution,
778/// and to signal when their execution is done.
779///
780/// Normally this can be done with a single semaphore, waited on and then
781/// signalled for each submission. At any given time there's exactly one
782/// submission that would signal the semaphore, and exactly one waiting on it,
783/// as Vulkan requires.
784///
785/// However, as of Oct 2021, bug [#5508] in the Mesa ANV drivers caused them to
786/// hang if we use a single semaphore. The workaround is to alternate between
787/// two semaphores. The bug has been fixed in Mesa, but we should probably keep
788/// the workaround until, say, Oct 2026.
789///
790/// [`wgpu_hal::Queue`]: crate::Queue
791/// [`submit`]: crate::Queue::submit
792/// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
793/// [#5508]: https://gitlab.freedesktop.org/mesa/mesa/-/issues/5508
794#[derive(Clone)]
795struct RelaySemaphores {
796 /// The semaphore the next submission should wait on before beginning
797 /// execution on the GPU. This is `None` for the first submission, which
798 /// should not wait on anything at all.
799 wait: Option<vk::Semaphore>,
800
801 /// The semaphore the next submission should signal when it has finished
802 /// execution on the GPU.
803 signal: vk::Semaphore,
804}
805
806impl RelaySemaphores {
807 fn new(device: &DeviceShared) -> Result<Self, crate::DeviceError> {
808 Ok(Self {
809 wait: None,
810 signal: device.new_binary_semaphore("RelaySemaphores: 1")?,
811 })
812 }
813
814 /// Advances the semaphores, returning the semaphores that should be used for a submission.
815 fn advance(&mut self, device: &DeviceShared) -> Result<Self, crate::DeviceError> {
816 let old = self.clone();
817
818 // Build the state for the next submission.
819 match self.wait {
820 None => {
821 // The `old` values describe the first submission to this queue.
822 // The second submission should wait on `old.signal`, and then
823 // signal a new semaphore which we'll create now.
824 self.wait = Some(old.signal);
825 self.signal = device.new_binary_semaphore("RelaySemaphores: 2")?;
826 }
827 Some(ref mut wait) => {
828 // What this submission signals, the next should wait.
829 mem::swap(wait, &mut self.signal);
830 }
831 };
832
833 Ok(old)
834 }
835
836 /// Destroys the semaphores.
837 unsafe fn destroy(&self, device: &ash::Device) {
838 unsafe {
839 if let Some(wait) = self.wait {
840 device.destroy_semaphore(wait, None);
841 }
842 device.destroy_semaphore(self.signal, None);
843 }
844 }
845}
846
847pub struct Queue {
848 raw: vk::Queue,
849 swapchain_fn: khr::swapchain::Device,
850 device: Arc<DeviceShared>,
851 family_index: u32,
852 relay_semaphores: Mutex<RelaySemaphores>,
853 signal_semaphores: Mutex<SemaphoreList>,
854}
855
856impl Queue {
857 pub fn as_raw(&self) -> vk::Queue {
858 self.raw
859 }
860}
861
862impl Drop for Queue {
863 fn drop(&mut self) {
864 unsafe { self.relay_semaphores.lock().destroy(&self.device.raw) };
865 }
866}
867#[derive(Debug)]
868enum BufferMemoryBacking {
869 Managed(gpu_alloc::MemoryBlock<vk::DeviceMemory>),
870 VulkanMemory {
871 memory: vk::DeviceMemory,
872 offset: u64,
873 size: u64,
874 },
875}
876impl BufferMemoryBacking {
877 fn memory(&self) -> &vk::DeviceMemory {
878 match self {
879 Self::Managed(m) => m.memory(),
880 Self::VulkanMemory { memory, .. } => memory,
881 }
882 }
883 fn offset(&self) -> u64 {
884 match self {
885 Self::Managed(m) => m.offset(),
886 Self::VulkanMemory { offset, .. } => *offset,
887 }
888 }
889 fn size(&self) -> u64 {
890 match self {
891 Self::Managed(m) => m.size(),
892 Self::VulkanMemory { size, .. } => *size,
893 }
894 }
895}
896#[derive(Debug)]
897pub struct Buffer {
898 raw: vk::Buffer,
899 block: Option<Mutex<BufferMemoryBacking>>,
900}
901impl Buffer {
902 /// # Safety
903 ///
904 /// - `vk_buffer`'s memory must be managed by the caller
905 /// - Externally imported buffers can't be mapped by `wgpu`
906 pub unsafe fn from_raw(vk_buffer: vk::Buffer) -> Self {
907 Self {
908 raw: vk_buffer,
909 block: None,
910 }
911 }
912 /// # Safety
913 /// - We will use this buffer and the buffer's backing memory range as if we have exclusive ownership over it, until the wgpu resource is dropped and the wgpu-hal object is cleaned up
914 /// - Externally imported buffers can't be mapped by `wgpu`
915 /// - `offset` and `size` must be valid with the allocation of `memory`
916 pub unsafe fn from_raw_managed(
917 vk_buffer: vk::Buffer,
918 memory: vk::DeviceMemory,
919 offset: u64,
920 size: u64,
921 ) -> Self {
922 Self {
923 raw: vk_buffer,
924 block: Some(Mutex::new(BufferMemoryBacking::VulkanMemory {
925 memory,
926 offset,
927 size,
928 })),
929 }
930 }
931}
932
933impl crate::DynBuffer for Buffer {}
934
935#[derive(Debug)]
936pub struct AccelerationStructure {
937 raw: vk::AccelerationStructureKHR,
938 buffer: vk::Buffer,
939 block: Mutex<gpu_alloc::MemoryBlock<vk::DeviceMemory>>,
940 compacted_size_query: Option<vk::QueryPool>,
941}
942
943impl crate::DynAccelerationStructure for AccelerationStructure {}
944
945#[derive(Debug)]
946pub struct Texture {
947 raw: vk::Image,
948 drop_guard: Option<crate::DropGuard>,
949 external_memory: Option<vk::DeviceMemory>,
950 block: Option<gpu_alloc::MemoryBlock<vk::DeviceMemory>>,
951 format: wgt::TextureFormat,
952 copy_size: crate::CopyExtent,
953 identity: ResourceIdentity<vk::Image>,
954}
955
956impl crate::DynTexture for Texture {}
957
958impl Texture {
959 /// # Safety
960 ///
961 /// - The image handle must not be manually destroyed
962 pub unsafe fn raw_handle(&self) -> vk::Image {
963 self.raw
964 }
965
966 /// # Safety
967 ///
968 /// - The external memory must not be manually freed
969 pub unsafe fn external_memory(&self) -> Option<vk::DeviceMemory> {
970 self.external_memory
971 }
972}
973
974#[derive(Debug)]
975pub struct TextureView {
976 raw_texture: vk::Image,
977 raw: vk::ImageView,
978 layers: NonZeroU32,
979 format: wgt::TextureFormat,
980 raw_format: vk::Format,
981 base_mip_level: u32,
982 dimension: wgt::TextureViewDimension,
983 texture_identity: ResourceIdentity<vk::Image>,
984 view_identity: ResourceIdentity<vk::ImageView>,
985}
986
987impl crate::DynTextureView for TextureView {}
988
989impl TextureView {
990 /// # Safety
991 ///
992 /// - The image view handle must not be manually destroyed
993 pub unsafe fn raw_handle(&self) -> vk::ImageView {
994 self.raw
995 }
996
997 /// Returns the raw texture view, along with its identity.
998 fn identified_raw_view(&self) -> IdentifiedTextureView {
999 IdentifiedTextureView {
1000 raw: self.raw,
1001 identity: self.view_identity,
1002 }
1003 }
1004}
1005
1006#[derive(Debug)]
1007pub struct Sampler {
1008 raw: vk::Sampler,
1009 create_info: vk::SamplerCreateInfo<'static>,
1010}
1011
1012impl crate::DynSampler for Sampler {}
1013
1014/// Information about a binding within a specific BindGroupLayout / BindGroup.
1015/// This will be used to construct a [`naga::back::spv::BindingInfo`], where
1016/// the descriptor set value will be taken from the index of the group.
1017#[derive(Copy, Clone, Debug)]
1018struct BindingInfo {
1019 binding: u32,
1020 binding_array_size: Option<NonZeroU32>,
1021}
1022
1023#[derive(Debug)]
1024pub struct BindGroupLayout {
1025 raw: vk::DescriptorSetLayout,
1026 desc_count: gpu_descriptor::DescriptorTotalCount,
1027 /// Sorted list of entries.
1028 entries: Box<[wgt::BindGroupLayoutEntry]>,
1029 /// Map of original binding index to remapped binding index and optional
1030 /// array size.
1031 binding_map: Vec<(u32, BindingInfo)>,
1032 contains_binding_arrays: bool,
1033}
1034
1035impl crate::DynBindGroupLayout for BindGroupLayout {}
1036
1037#[derive(Debug)]
1038pub struct PipelineLayout {
1039 raw: vk::PipelineLayout,
1040 binding_map: naga::back::spv::BindingMap,
1041}
1042
1043impl crate::DynPipelineLayout for PipelineLayout {}
1044
1045#[derive(Debug)]
1046pub struct BindGroup {
1047 set: gpu_descriptor::DescriptorSet<vk::DescriptorSet>,
1048}
1049
1050impl crate::DynBindGroup for BindGroup {}
1051
1052/// Miscellaneous allocation recycling pool for `CommandAllocator`.
1053#[derive(Default)]
1054struct Temp {
1055 marker: Vec<u8>,
1056 buffer_barriers: Vec<vk::BufferMemoryBarrier<'static>>,
1057 image_barriers: Vec<vk::ImageMemoryBarrier<'static>>,
1058}
1059
1060impl Temp {
1061 fn clear(&mut self) {
1062 self.marker.clear();
1063 self.buffer_barriers.clear();
1064 self.image_barriers.clear();
1065 }
1066
1067 fn make_c_str(&mut self, name: &str) -> &CStr {
1068 self.marker.clear();
1069 self.marker.extend_from_slice(name.as_bytes());
1070 self.marker.push(0);
1071 unsafe { CStr::from_bytes_with_nul_unchecked(&self.marker) }
1072 }
1073}
1074
1075/// Generates unique IDs for each resource of type `T`.
1076///
1077/// Because vk handles are not permanently unique, this
1078/// provides a way to generate unique IDs for each resource.
1079struct ResourceIdentityFactory<T> {
1080 #[cfg(not(target_has_atomic = "64"))]
1081 next_id: Mutex<u64>,
1082 #[cfg(target_has_atomic = "64")]
1083 next_id: core::sync::atomic::AtomicU64,
1084 _phantom: PhantomData<T>,
1085}
1086
1087impl<T> ResourceIdentityFactory<T> {
1088 fn new() -> Self {
1089 Self {
1090 #[cfg(not(target_has_atomic = "64"))]
1091 next_id: Mutex::new(0),
1092 #[cfg(target_has_atomic = "64")]
1093 next_id: core::sync::atomic::AtomicU64::new(0),
1094 _phantom: PhantomData,
1095 }
1096 }
1097
1098 /// Returns a new unique ID for a resource of type `T`.
1099 fn next(&self) -> ResourceIdentity<T> {
1100 #[cfg(not(target_has_atomic = "64"))]
1101 {
1102 let mut next_id = self.next_id.lock();
1103 let id = *next_id;
1104 *next_id += 1;
1105 ResourceIdentity {
1106 id,
1107 _phantom: PhantomData,
1108 }
1109 }
1110
1111 #[cfg(target_has_atomic = "64")]
1112 ResourceIdentity {
1113 id: self
1114 .next_id
1115 .fetch_add(1, core::sync::atomic::Ordering::Relaxed),
1116 _phantom: PhantomData,
1117 }
1118 }
1119}
1120
1121/// A unique identifier for a resource of type `T`.
1122///
1123/// This is used as a hashable key for resources, which
1124/// is permanently unique through the lifetime of the program.
1125#[derive(Debug, Copy, Clone, Eq, Hash, PartialEq)]
1126struct ResourceIdentity<T> {
1127 id: u64,
1128 _phantom: PhantomData<T>,
1129}
1130
1131#[derive(Clone, Eq, Hash, PartialEq)]
1132struct FramebufferKey {
1133 raw_pass: vk::RenderPass,
1134 /// Because this is used as a key in a hash map, we need to include the identity
1135 /// so that this hashes differently, even if the ImageView handles are the same
1136 /// between different views.
1137 attachment_identities: ArrayVec<ResourceIdentity<vk::ImageView>, { MAX_TOTAL_ATTACHMENTS }>,
1138 /// While this is redundant for calculating the hash, we need access to an array
1139 /// of all the raw ImageViews when we are creating the actual framebuffer,
1140 /// so we store this here.
1141 attachment_views: ArrayVec<vk::ImageView, { MAX_TOTAL_ATTACHMENTS }>,
1142 extent: wgt::Extent3d,
1143}
1144
1145impl FramebufferKey {
1146 fn push_view(&mut self, view: IdentifiedTextureView) {
1147 self.attachment_identities.push(view.identity);
1148 self.attachment_views.push(view.raw);
1149 }
1150}
1151
1152/// A texture view paired with its identity.
1153#[derive(Copy, Clone)]
1154struct IdentifiedTextureView {
1155 raw: vk::ImageView,
1156 identity: ResourceIdentity<vk::ImageView>,
1157}
1158
1159#[derive(Clone, Eq, Hash, PartialEq)]
1160struct TempTextureViewKey {
1161 texture: vk::Image,
1162 /// As this is used in a hashmap, we need to
1163 /// include the identity so that this hashes differently,
1164 /// even if the Image handles are the same between different images.
1165 texture_identity: ResourceIdentity<vk::Image>,
1166 format: vk::Format,
1167 mip_level: u32,
1168 depth_slice: u32,
1169}
1170
1171pub struct CommandEncoder {
1172 raw: vk::CommandPool,
1173 device: Arc<DeviceShared>,
1174
1175 /// The current command buffer, if `self` is in the ["recording"]
1176 /// state.
1177 ///
1178 /// ["recording"]: crate::CommandEncoder
1179 ///
1180 /// If non-`null`, the buffer is in the Vulkan "recording" state.
1181 active: vk::CommandBuffer,
1182
1183 /// What kind of pass we are currently within: compute or render.
1184 bind_point: vk::PipelineBindPoint,
1185
1186 /// Allocation recycling pool for this encoder.
1187 temp: Temp,
1188
1189 /// A pool of available command buffers.
1190 ///
1191 /// These are all in the Vulkan "initial" state.
1192 free: Vec<vk::CommandBuffer>,
1193
1194 /// A pool of discarded command buffers.
1195 ///
1196 /// These could be in any Vulkan state except "pending".
1197 discarded: Vec<vk::CommandBuffer>,
1198
1199 /// If this is true, the active renderpass enabled a debug span,
1200 /// and needs to be disabled on renderpass close.
1201 rpass_debug_marker_active: bool,
1202
1203 /// If set, the end of the next render/compute pass will write a timestamp at
1204 /// the given pool & location.
1205 end_of_pass_timer_query: Option<(vk::QueryPool, u32)>,
1206
1207 framebuffers: FastHashMap<FramebufferKey, vk::Framebuffer>,
1208 temp_texture_views: FastHashMap<TempTextureViewKey, IdentifiedTextureView>,
1209
1210 counters: Arc<wgt::HalCounters>,
1211}
1212
1213impl Drop for CommandEncoder {
1214 fn drop(&mut self) {
1215 // SAFETY:
1216 //
1217 // VUID-vkDestroyCommandPool-commandPool-00041: wgpu_hal requires that a
1218 // `CommandBuffer` must live until its execution is complete, and that a
1219 // `CommandBuffer` must not outlive the `CommandEncoder` that built it.
1220 // Thus, we know that none of our `CommandBuffers` are in the "pending"
1221 // state.
1222 //
1223 // The other VUIDs are pretty obvious.
1224 unsafe {
1225 // `vkDestroyCommandPool` also frees any command buffers allocated
1226 // from that pool, so there's no need to explicitly call
1227 // `vkFreeCommandBuffers` on `cmd_encoder`'s `free` and `discarded`
1228 // fields.
1229 self.device.raw.destroy_command_pool(self.raw, None);
1230 }
1231
1232 for (_, fb) in self.framebuffers.drain() {
1233 unsafe { self.device.raw.destroy_framebuffer(fb, None) };
1234 }
1235
1236 for (_, view) in self.temp_texture_views.drain() {
1237 unsafe { self.device.raw.destroy_image_view(view.raw, None) };
1238 }
1239
1240 self.counters.command_encoders.sub(1);
1241 }
1242}
1243
1244impl CommandEncoder {
1245 /// # Safety
1246 ///
1247 /// - The command buffer handle must not be manually destroyed
1248 pub unsafe fn raw_handle(&self) -> vk::CommandBuffer {
1249 self.active
1250 }
1251}
1252
1253impl fmt::Debug for CommandEncoder {
1254 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1255 f.debug_struct("CommandEncoder")
1256 .field("raw", &self.raw)
1257 .finish()
1258 }
1259}
1260
1261#[derive(Debug)]
1262pub struct CommandBuffer {
1263 raw: vk::CommandBuffer,
1264}
1265
1266impl crate::DynCommandBuffer for CommandBuffer {}
1267
1268#[derive(Debug)]
1269#[allow(clippy::large_enum_variant)]
1270pub enum ShaderModule {
1271 Raw(vk::ShaderModule),
1272 Intermediate {
1273 naga_shader: crate::NagaShader,
1274 runtime_checks: wgt::ShaderRuntimeChecks,
1275 },
1276}
1277
1278impl crate::DynShaderModule for ShaderModule {}
1279
1280#[derive(Debug)]
1281pub struct RenderPipeline {
1282 raw: vk::Pipeline,
1283}
1284
1285impl crate::DynRenderPipeline for RenderPipeline {}
1286
1287#[derive(Debug)]
1288pub struct ComputePipeline {
1289 raw: vk::Pipeline,
1290}
1291
1292impl crate::DynComputePipeline for ComputePipeline {}
1293
1294#[derive(Debug)]
1295pub struct PipelineCache {
1296 raw: vk::PipelineCache,
1297}
1298
1299impl crate::DynPipelineCache for PipelineCache {}
1300
1301#[derive(Debug)]
1302pub struct QuerySet {
1303 raw: vk::QueryPool,
1304}
1305
1306impl crate::DynQuerySet for QuerySet {}
1307
1308/// The [`Api::Fence`] type for [`vulkan::Api`].
1309///
1310/// This is an `enum` because there are two possible implementations of
1311/// `wgpu-hal` fences on Vulkan: Vulkan fences, which work on any version of
1312/// Vulkan, and Vulkan timeline semaphores, which are easier and cheaper but
1313/// require non-1.0 features.
1314///
1315/// [`Device::create_fence`] returns a [`TimelineSemaphore`] if
1316/// [`VK_KHR_timeline_semaphore`] is available and enabled, and a [`FencePool`]
1317/// otherwise.
1318///
1319/// [`Api::Fence`]: crate::Api::Fence
1320/// [`vulkan::Api`]: Api
1321/// [`Device::create_fence`]: crate::Device::create_fence
1322/// [`TimelineSemaphore`]: Fence::TimelineSemaphore
1323/// [`VK_KHR_timeline_semaphore`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VK_KHR_timeline_semaphore
1324/// [`FencePool`]: Fence::FencePool
1325#[derive(Debug)]
1326pub enum Fence {
1327 /// A Vulkan [timeline semaphore].
1328 ///
1329 /// These are simpler to use than Vulkan fences, since timeline semaphores
1330 /// work exactly the way [`wpgu_hal::Api::Fence`] is specified to work.
1331 ///
1332 /// [timeline semaphore]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-semaphores
1333 /// [`wpgu_hal::Api::Fence`]: crate::Api::Fence
1334 TimelineSemaphore(vk::Semaphore),
1335
1336 /// A collection of Vulkan [fence]s, each associated with a [`FenceValue`].
1337 ///
1338 /// The effective [`FenceValue`] of this variant is the greater of
1339 /// `last_completed` and the maximum value associated with a signalled fence
1340 /// in `active`.
1341 ///
1342 /// Fences are available in all versions of Vulkan, but since they only have
1343 /// two states, "signaled" and "unsignaled", we need to use a separate fence
1344 /// for each queue submission we might want to wait for, and remember which
1345 /// [`FenceValue`] each one represents.
1346 ///
1347 /// [fence]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-fences
1348 /// [`FenceValue`]: crate::FenceValue
1349 FencePool {
1350 last_completed: crate::FenceValue,
1351 /// The pending fence values have to be ascending.
1352 active: Vec<(crate::FenceValue, vk::Fence)>,
1353 free: Vec<vk::Fence>,
1354 },
1355}
1356
1357impl crate::DynFence for Fence {}
1358
1359impl Fence {
1360 /// Return the highest [`FenceValue`] among the signalled fences in `active`.
1361 ///
1362 /// As an optimization, assume that we already know that the fence has
1363 /// reached `last_completed`, and don't bother checking fences whose values
1364 /// are less than that: those fences remain in the `active` array only
1365 /// because we haven't called `maintain` yet to clean them up.
1366 ///
1367 /// [`FenceValue`]: crate::FenceValue
1368 fn check_active(
1369 device: &ash::Device,
1370 mut last_completed: crate::FenceValue,
1371 active: &[(crate::FenceValue, vk::Fence)],
1372 ) -> Result<crate::FenceValue, crate::DeviceError> {
1373 for &(value, raw) in active.iter() {
1374 unsafe {
1375 if value > last_completed
1376 && device
1377 .get_fence_status(raw)
1378 .map_err(map_host_device_oom_and_lost_err)?
1379 {
1380 last_completed = value;
1381 }
1382 }
1383 }
1384 Ok(last_completed)
1385 }
1386
1387 /// Return the highest signalled [`FenceValue`] for `self`.
1388 ///
1389 /// [`FenceValue`]: crate::FenceValue
1390 fn get_latest(
1391 &self,
1392 device: &ash::Device,
1393 extension: Option<&ExtensionFn<khr::timeline_semaphore::Device>>,
1394 ) -> Result<crate::FenceValue, crate::DeviceError> {
1395 match *self {
1396 Self::TimelineSemaphore(raw) => unsafe {
1397 Ok(match *extension.unwrap() {
1398 ExtensionFn::Extension(ref ext) => ext
1399 .get_semaphore_counter_value(raw)
1400 .map_err(map_host_device_oom_and_lost_err)?,
1401 ExtensionFn::Promoted => device
1402 .get_semaphore_counter_value(raw)
1403 .map_err(map_host_device_oom_and_lost_err)?,
1404 })
1405 },
1406 Self::FencePool {
1407 last_completed,
1408 ref active,
1409 free: _,
1410 } => Self::check_active(device, last_completed, active),
1411 }
1412 }
1413
1414 /// Trim the internal state of this [`Fence`].
1415 ///
1416 /// This function has no externally visible effect, but you should call it
1417 /// periodically to keep this fence's resource consumption under control.
1418 ///
1419 /// For fences using the [`FencePool`] implementation, this function
1420 /// recycles fences that have been signaled. If you don't call this,
1421 /// [`Queue::submit`] will just keep allocating a new Vulkan fence every
1422 /// time it's called.
1423 ///
1424 /// [`FencePool`]: Fence::FencePool
1425 /// [`Queue::submit`]: crate::Queue::submit
1426 fn maintain(&mut self, device: &ash::Device) -> Result<(), crate::DeviceError> {
1427 match *self {
1428 Self::TimelineSemaphore(_) => {}
1429 Self::FencePool {
1430 ref mut last_completed,
1431 ref mut active,
1432 ref mut free,
1433 } => {
1434 let latest = Self::check_active(device, *last_completed, active)?;
1435 let base_free = free.len();
1436 for &(value, raw) in active.iter() {
1437 if value <= latest {
1438 free.push(raw);
1439 }
1440 }
1441 if free.len() != base_free {
1442 active.retain(|&(value, _)| value > latest);
1443 unsafe { device.reset_fences(&free[base_free..]) }
1444 .map_err(map_device_oom_err)?
1445 }
1446 *last_completed = latest;
1447 }
1448 }
1449 Ok(())
1450 }
1451}
1452
1453impl crate::Queue for Queue {
1454 type A = Api;
1455
1456 unsafe fn submit(
1457 &self,
1458 command_buffers: &[&CommandBuffer],
1459 surface_textures: &[&SurfaceTexture],
1460 (signal_fence, signal_value): (&mut Fence, crate::FenceValue),
1461 ) -> Result<(), crate::DeviceError> {
1462 let mut fence_raw = vk::Fence::null();
1463
1464 let mut wait_stage_masks = Vec::new();
1465 let mut wait_semaphores = Vec::new();
1466 let mut signal_semaphores = SemaphoreList::default();
1467
1468 // Double check that the same swapchain image isn't being given to us multiple times,
1469 // as that will deadlock when we try to lock them all.
1470 debug_assert!(
1471 {
1472 let mut check = HashSet::with_capacity(surface_textures.len());
1473 // We compare the Arcs by pointer, as Eq isn't well defined for SurfaceSemaphores.
1474 for st in surface_textures {
1475 check.insert(Arc::as_ptr(&st.acquire_semaphores) as usize);
1476 check.insert(Arc::as_ptr(&st.present_semaphores) as usize);
1477 }
1478 check.len() == surface_textures.len() * 2
1479 },
1480 "More than one surface texture is being used from the same swapchain. This will cause a deadlock in release."
1481 );
1482
1483 let locked_swapchain_semaphores = surface_textures
1484 .iter()
1485 .map(|st| {
1486 let acquire = st
1487 .acquire_semaphores
1488 .try_lock()
1489 .expect("Failed to lock surface acquire semaphore");
1490 let present = st
1491 .present_semaphores
1492 .try_lock()
1493 .expect("Failed to lock surface present semaphore");
1494
1495 (acquire, present)
1496 })
1497 .collect::<Vec<_>>();
1498
1499 for (mut acquire_semaphore, mut present_semaphores) in locked_swapchain_semaphores {
1500 acquire_semaphore.set_used_fence_value(signal_value);
1501
1502 // If we're the first submission to operate on this image, wait on
1503 // its acquire semaphore, to make sure the presentation engine is
1504 // done with it.
1505 if let Some(sem) = acquire_semaphore.get_acquire_wait_semaphore() {
1506 wait_stage_masks.push(vk::PipelineStageFlags::TOP_OF_PIPE);
1507 wait_semaphores.push(sem);
1508 }
1509
1510 // Get a semaphore to signal when we're done writing to this surface
1511 // image. Presentation of this image will wait for this.
1512 let signal_semaphore = present_semaphores.get_submit_signal_semaphore(&self.device)?;
1513 signal_semaphores.push_binary(signal_semaphore);
1514 }
1515
1516 let mut guard = self.signal_semaphores.lock();
1517 if !guard.is_empty() {
1518 signal_semaphores.append(&mut guard);
1519 }
1520
1521 // In order for submissions to be strictly ordered, we encode a dependency between each submission
1522 // using a pair of semaphores. This adds a wait if it is needed, and signals the next semaphore.
1523 let semaphore_state = self.relay_semaphores.lock().advance(&self.device)?;
1524
1525 if let Some(sem) = semaphore_state.wait {
1526 wait_stage_masks.push(vk::PipelineStageFlags::TOP_OF_PIPE);
1527 wait_semaphores.push(sem);
1528 }
1529
1530 signal_semaphores.push_binary(semaphore_state.signal);
1531
1532 // We need to signal our wgpu::Fence if we have one, this adds it to the signal list.
1533 signal_fence.maintain(&self.device.raw)?;
1534 match *signal_fence {
1535 Fence::TimelineSemaphore(raw) => {
1536 signal_semaphores.push_timeline(raw, signal_value);
1537 }
1538 Fence::FencePool {
1539 ref mut active,
1540 ref mut free,
1541 ..
1542 } => {
1543 fence_raw = match free.pop() {
1544 Some(raw) => raw,
1545 None => unsafe {
1546 self.device
1547 .raw
1548 .create_fence(&vk::FenceCreateInfo::default(), None)
1549 .map_err(map_host_device_oom_err)?
1550 },
1551 };
1552 active.push((signal_value, fence_raw));
1553 }
1554 }
1555
1556 let vk_cmd_buffers = command_buffers
1557 .iter()
1558 .map(|cmd| cmd.raw)
1559 .collect::<Vec<_>>();
1560
1561 let mut vk_info = vk::SubmitInfo::default().command_buffers(&vk_cmd_buffers);
1562
1563 vk_info = vk_info
1564 .wait_semaphores(&wait_semaphores)
1565 .wait_dst_stage_mask(&wait_stage_masks);
1566
1567 let mut vk_timeline_info = mem::MaybeUninit::uninit();
1568 vk_info = signal_semaphores.add_to_submit(vk_info, &mut vk_timeline_info);
1569
1570 profiling::scope!("vkQueueSubmit");
1571 unsafe {
1572 self.device
1573 .raw
1574 .queue_submit(self.raw, &[vk_info], fence_raw)
1575 .map_err(map_host_device_oom_and_lost_err)?
1576 };
1577 Ok(())
1578 }
1579
1580 unsafe fn present(
1581 &self,
1582 surface: &Surface,
1583 texture: SurfaceTexture,
1584 ) -> Result<(), crate::SurfaceError> {
1585 let mut swapchain = surface.swapchain.write();
1586 let ssc = swapchain.as_mut().unwrap();
1587 let mut acquire_semaphore = texture.acquire_semaphores.lock();
1588 let mut present_semaphores = texture.present_semaphores.lock();
1589
1590 let wait_semaphores = present_semaphores.get_present_wait_semaphores();
1591
1592 // Reset the acquire and present semaphores internal state
1593 // to be ready for the next frame.
1594 //
1595 // We do this before the actual call to present to ensure that
1596 // even if this method errors and early outs, we have reset
1597 // the state for next frame.
1598 acquire_semaphore.end_semaphore_usage();
1599 present_semaphores.end_semaphore_usage();
1600
1601 drop(acquire_semaphore);
1602
1603 let swapchains = [ssc.raw];
1604 let image_indices = [texture.index];
1605 let vk_info = vk::PresentInfoKHR::default()
1606 .swapchains(&swapchains)
1607 .image_indices(&image_indices)
1608 .wait_semaphores(&wait_semaphores);
1609
1610 let mut display_timing;
1611 let present_times;
1612 let vk_info = if let Some(present_time) = ssc.next_present_time.take() {
1613 debug_assert!(
1614 ssc.device
1615 .features
1616 .contains(wgt::Features::VULKAN_GOOGLE_DISPLAY_TIMING),
1617 "`next_present_time` should only be set if `VULKAN_GOOGLE_DISPLAY_TIMING` is enabled"
1618 );
1619 present_times = [present_time];
1620 display_timing = vk::PresentTimesInfoGOOGLE::default().times(&present_times);
1621 // SAFETY: We know that VK_GOOGLE_display_timing is present because of the safety contract on `next_present_time`.
1622 vk_info.push_next(&mut display_timing)
1623 } else {
1624 vk_info
1625 };
1626
1627 let suboptimal = {
1628 profiling::scope!("vkQueuePresentKHR");
1629 unsafe { self.swapchain_fn.queue_present(self.raw, &vk_info) }.map_err(|error| {
1630 match error {
1631 vk::Result::ERROR_OUT_OF_DATE_KHR => crate::SurfaceError::Outdated,
1632 vk::Result::ERROR_SURFACE_LOST_KHR => crate::SurfaceError::Lost,
1633 // We don't use VK_EXT_full_screen_exclusive
1634 // VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT
1635 _ => map_host_device_oom_and_lost_err(error).into(),
1636 }
1637 })?
1638 };
1639 if suboptimal {
1640 // We treat `VK_SUBOPTIMAL_KHR` as `VK_SUCCESS` on Android.
1641 // On Android 10+, libvulkan's `vkQueuePresentKHR` implementation returns `VK_SUBOPTIMAL_KHR` if not doing pre-rotation
1642 // (i.e `VkSwapchainCreateInfoKHR::preTransform` not being equal to the current device orientation).
1643 // This is always the case when the device orientation is anything other than the identity one, as we unconditionally use `VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR`.
1644 #[cfg(not(target_os = "android"))]
1645 log::warn!("Suboptimal present of frame {}", texture.index);
1646 }
1647 Ok(())
1648 }
1649
1650 unsafe fn get_timestamp_period(&self) -> f32 {
1651 self.device.timestamp_period
1652 }
1653}
1654
1655impl Queue {
1656 pub fn raw_device(&self) -> &ash::Device {
1657 &self.device.raw
1658 }
1659
1660 pub fn add_signal_semaphore(&self, semaphore: vk::Semaphore, semaphore_value: Option<u64>) {
1661 let mut guard = self.signal_semaphores.lock();
1662 if let Some(value) = semaphore_value {
1663 guard.push_timeline(semaphore, value);
1664 } else {
1665 guard.push_binary(semaphore);
1666 }
1667 }
1668}
1669
1670/// Maps
1671///
1672/// - VK_ERROR_OUT_OF_HOST_MEMORY
1673/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1674fn map_host_device_oom_err(err: vk::Result) -> crate::DeviceError {
1675 match err {
1676 vk::Result::ERROR_OUT_OF_HOST_MEMORY | vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => {
1677 get_oom_err(err)
1678 }
1679 e => get_unexpected_err(e),
1680 }
1681}
1682
1683/// Maps
1684///
1685/// - VK_ERROR_OUT_OF_HOST_MEMORY
1686/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1687/// - VK_ERROR_DEVICE_LOST
1688fn map_host_device_oom_and_lost_err(err: vk::Result) -> crate::DeviceError {
1689 match err {
1690 vk::Result::ERROR_DEVICE_LOST => get_lost_err(),
1691 other => map_host_device_oom_err(other),
1692 }
1693}
1694
1695/// Maps
1696///
1697/// - VK_ERROR_OUT_OF_HOST_MEMORY
1698/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1699/// - VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1700fn map_host_device_oom_and_ioca_err(err: vk::Result) -> crate::DeviceError {
1701 // We don't use VK_KHR_buffer_device_address
1702 // VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1703 map_host_device_oom_err(err)
1704}
1705
1706/// Maps
1707///
1708/// - VK_ERROR_OUT_OF_HOST_MEMORY
1709fn map_host_oom_err(err: vk::Result) -> crate::DeviceError {
1710 match err {
1711 vk::Result::ERROR_OUT_OF_HOST_MEMORY => get_oom_err(err),
1712 e => get_unexpected_err(e),
1713 }
1714}
1715
1716/// Maps
1717///
1718/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1719fn map_device_oom_err(err: vk::Result) -> crate::DeviceError {
1720 match err {
1721 vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => get_oom_err(err),
1722 e => get_unexpected_err(e),
1723 }
1724}
1725
1726/// Maps
1727///
1728/// - VK_ERROR_OUT_OF_HOST_MEMORY
1729/// - VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1730fn map_host_oom_and_ioca_err(err: vk::Result) -> crate::DeviceError {
1731 // We don't use VK_KHR_buffer_device_address
1732 // VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR
1733 map_host_oom_err(err)
1734}
1735
1736/// Maps
1737///
1738/// - VK_ERROR_OUT_OF_HOST_MEMORY
1739/// - VK_ERROR_OUT_OF_DEVICE_MEMORY
1740/// - VK_PIPELINE_COMPILE_REQUIRED_EXT
1741/// - VK_ERROR_INVALID_SHADER_NV
1742fn map_pipeline_err(err: vk::Result) -> crate::DeviceError {
1743 // We don't use VK_EXT_pipeline_creation_cache_control
1744 // VK_PIPELINE_COMPILE_REQUIRED_EXT
1745 // We don't use VK_NV_glsl_shader
1746 // VK_ERROR_INVALID_SHADER_NV
1747 map_host_device_oom_err(err)
1748}
1749
1750/// Returns [`crate::DeviceError::Unexpected`] or panics if the `internal_error_panic`
1751/// feature flag is enabled.
1752fn get_unexpected_err(_err: vk::Result) -> crate::DeviceError {
1753 #[cfg(feature = "internal_error_panic")]
1754 panic!("Unexpected Vulkan error: {_err:?}");
1755
1756 #[allow(unreachable_code)]
1757 crate::DeviceError::Unexpected
1758}
1759
1760/// Returns [`crate::DeviceError::OutOfMemory`].
1761fn get_oom_err(_err: vk::Result) -> crate::DeviceError {
1762 crate::DeviceError::OutOfMemory
1763}
1764
1765/// Returns [`crate::DeviceError::Lost`] or panics if the `device_lost_panic`
1766/// feature flag is enabled.
1767fn get_lost_err() -> crate::DeviceError {
1768 #[cfg(feature = "device_lost_panic")]
1769 panic!("Device lost");
1770
1771 #[allow(unreachable_code)]
1772 crate::DeviceError::Lost
1773}
1774
1775#[derive(Clone, Copy, Pod, Zeroable)]
1776#[repr(C)]
1777struct RawTlasInstance {
1778 transform: [f32; 12],
1779 custom_data_and_mask: u32,
1780 shader_binding_table_record_offset_and_flags: u32,
1781 acceleration_structure_reference: u64,
1782}
1783
1784/// Arguments to the [`CreateDeviceCallback`].
1785pub struct CreateDeviceCallbackArgs<'arg, 'pnext, 'this>
1786where
1787 'this: 'pnext,
1788{
1789 /// The extensions to enable for the device. You must not remove anything from this list,
1790 /// but you may add to it.
1791 pub extensions: &'arg mut Vec<&'static CStr>,
1792 /// The physical device features to enable. You may enable features, but must not disable any.
1793 pub device_features: &'arg mut PhysicalDeviceFeatures,
1794 /// The queue create infos for the device. You may add or modify queue create infos as needed.
1795 pub queue_create_infos: &'arg mut Vec<vk::DeviceQueueCreateInfo<'pnext>>,
1796 /// The create info for the device. You may add or modify things in the pnext chain, but
1797 /// do not turn features off. Additionally, do not add things to the list of extensions,
1798 /// or to the feature set, as all changes to that member will be overwritten.
1799 pub create_info: &'arg mut vk::DeviceCreateInfo<'pnext>,
1800 /// We need to have `'this` in the struct, so we can declare that all lifetimes coming from
1801 /// captures in the closure will live longer (and hence satisfy) `'pnext`. However, we
1802 /// don't actually directly use `'this`
1803 _phantom: PhantomData<&'this ()>,
1804}
1805
1806/// Callback to allow changing the vulkan device creation parameters.
1807///
1808/// # Safety:
1809/// - If you want to add extensions, add the to the `Vec<'static CStr>` not the create info,
1810/// as the create info value will be overwritten.
1811/// - Callback must not remove features.
1812/// - Callback must not change anything to what the instance does not support.
1813pub type CreateDeviceCallback<'this> =
1814 dyn for<'arg, 'pnext> FnOnce(CreateDeviceCallbackArgs<'arg, 'pnext, 'this>) + 'this;
1815
1816/// Arguments to the [`CreateInstanceCallback`].
1817pub struct CreateInstanceCallbackArgs<'arg, 'pnext, 'this>
1818where
1819 'this: 'pnext,
1820{
1821 /// The extensions to enable for the instance. You must not remove anything from this list,
1822 /// but you may add to it.
1823 pub extensions: &'arg mut Vec<&'static CStr>,
1824 /// The create info for the instance. You may add or modify things in the pnext chain, but
1825 /// do not turn features off. Additionally, do not add things to the list of extensions,
1826 /// all changes to that member will be overwritten.
1827 pub create_info: &'arg mut vk::InstanceCreateInfo<'pnext>,
1828 /// Vulkan entry point.
1829 pub entry: &'arg ash::Entry,
1830 /// We need to have `'this` in the struct, so we can declare that all lifetimes coming from
1831 /// captures in the closure will live longer (and hence satisfy) `'pnext`. However, we
1832 /// don't actually directly use `'this`
1833 _phantom: PhantomData<&'this ()>,
1834}
1835
1836/// Callback to allow changing the vulkan instance creation parameters.
1837///
1838/// # Safety:
1839/// - If you want to add extensions, add the to the `Vec<'static CStr>` not the create info,
1840/// as the create info value will be overwritten.
1841/// - Callback must not remove features.
1842/// - Callback must not change anything to what the instance does not support.
1843pub type CreateInstanceCallback<'this> =
1844 dyn for<'arg, 'pnext> FnOnce(CreateInstanceCallbackArgs<'arg, 'pnext, 'this>) + 'this;