wgpu_examples/timestamp_queries/
mod.rs

1//! Sample demonstrating different kinds of gpu timestamp queries.
2//!
3//! Timestamp queries are typically used to profile how long certain operations take on the GPU.
4//! wgpu has several ways of performing gpu timestamp queries:
5//! * passing `wgpu::RenderPassTimestampWrites`/`wgpu::ComputePassTimestampWrites` during render/compute pass creation.
6//!   This writes timestamps for the beginning and end of a given pass.
7//!   (enabled with wgpu::Features::TIMESTAMP_QUERY)
8//! * `wgpu::CommandEncoder::write_timestamp` writes a timestamp between any commands recorded on an encoder.
9//!   (enabled with wgpu::Features::TIMESTAMP_QUERY_INSIDE_ENCODERS)
10//! * `wgpu::RenderPass/ComputePass::write_timestamp` writes a timestamp within commands of a render pass.
11//!   Note that some GPU architectures do not support this.
12//!   (native only, enabled with wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES)
13//!
14//! Any timestamp is written to a `wgpu::QuerySet` which needs to be resolved to a buffer with `wgpu::BufferUsages::QUERY_RESOLVE`.
15//! Since this usage is incompatible with `wgpu::BufferUsages::MAP_READ` we need to copy the resolved timestamps to a separate buffer afterwards.
16//!
17//! The period, i.e. the unit of time, of the timestamps in wgpu is undetermined and needs to be queried with `wgpu::Queue::get_timestamp_period`
18//! in order to get comparable results.
19
20use wgpu::util::DeviceExt;
21
22struct Queries {
23    set: wgpu::QuerySet,
24    resolve_buffer: wgpu::Buffer,
25    destination_buffer: wgpu::Buffer,
26    num_queries: u64,
27    next_unused_query: u32,
28}
29
30struct QueryResults {
31    encoder_timestamps: [u64; 2],
32    render_start_end_timestamps: [u64; 2],
33    render_inside_timestamp: Option<u64>,
34    compute_start_end_timestamps: [u64; 2],
35    compute_inside_timestamp: Option<u64>,
36}
37
38impl QueryResults {
39    // Queries:
40    // * encoder timestamp start
41    // * encoder timestamp end
42    // * render start
43    // * render in-between (optional)
44    // * render end
45    // * compute start
46    // * compute in-between (optional)
47    // * compute end
48    const NUM_QUERIES: u64 = 8;
49
50    #[expect(
51        clippy::redundant_closure,
52        reason = "false positive for `get_next_slot`, which needs to be used by reference"
53    )]
54    fn from_raw_results(timestamps: Vec<u64>, timestamps_inside_passes: bool) -> Self {
55        assert_eq!(timestamps.len(), Self::NUM_QUERIES as usize);
56
57        let mut next_slot = 0;
58        let mut get_next_slot = || {
59            let slot = timestamps[next_slot];
60            next_slot += 1;
61            slot
62        };
63
64        let mut encoder_timestamps = [0, 0];
65        encoder_timestamps[0] = get_next_slot();
66        let render_start_end_timestamps = [get_next_slot(), get_next_slot()];
67        let render_inside_timestamp = timestamps_inside_passes.then(|| get_next_slot());
68        let compute_start_end_timestamps = [get_next_slot(), get_next_slot()];
69        let compute_inside_timestamp = timestamps_inside_passes.then(|| get_next_slot());
70        encoder_timestamps[1] = get_next_slot();
71
72        QueryResults {
73            encoder_timestamps,
74            render_start_end_timestamps,
75            render_inside_timestamp,
76            compute_start_end_timestamps,
77            compute_inside_timestamp,
78        }
79    }
80
81    fn print(&self, queue: &wgpu::Queue) {
82        let period = queue.get_timestamp_period();
83        let elapsed_us = |start, end: u64| end.wrapping_sub(start) as f64 * period as f64 / 1000.0;
84
85        println!(
86            "Elapsed time before render until after compute: {:.2} μs",
87            elapsed_us(self.encoder_timestamps[0], self.encoder_timestamps[1]),
88        );
89        println!(
90            "Elapsed time render pass: {:.2} μs",
91            elapsed_us(
92                self.render_start_end_timestamps[0],
93                self.render_start_end_timestamps[1]
94            )
95        );
96        if let Some(timestamp) = self.render_inside_timestamp {
97            println!(
98                "Elapsed time first triangle: {:.2} μs",
99                elapsed_us(self.render_start_end_timestamps[0], timestamp)
100            );
101        }
102        println!(
103            "Elapsed time compute pass: {:.2} μs",
104            elapsed_us(
105                self.compute_start_end_timestamps[0],
106                self.compute_start_end_timestamps[1]
107            )
108        );
109        if let Some(timestamp) = self.compute_inside_timestamp {
110            println!(
111                "Elapsed time after first dispatch: {:.2} μs",
112                elapsed_us(self.compute_start_end_timestamps[0], timestamp)
113            );
114        }
115    }
116}
117
118impl Queries {
119    fn new(device: &wgpu::Device, num_queries: u64) -> Self {
120        Queries {
121            set: device.create_query_set(&wgpu::QuerySetDescriptor {
122                label: Some("Timestamp query set"),
123                count: num_queries as _,
124                ty: wgpu::QueryType::Timestamp,
125            }),
126            resolve_buffer: device.create_buffer(&wgpu::BufferDescriptor {
127                label: Some("query resolve buffer"),
128                size: size_of::<u64>() as u64 * num_queries,
129                usage: wgpu::BufferUsages::COPY_SRC | wgpu::BufferUsages::QUERY_RESOLVE,
130                mapped_at_creation: false,
131            }),
132            destination_buffer: device.create_buffer(&wgpu::BufferDescriptor {
133                label: Some("query dest buffer"),
134                size: size_of::<u64>() as u64 * num_queries,
135                usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
136                mapped_at_creation: false,
137            }),
138            num_queries,
139            next_unused_query: 0,
140        }
141    }
142
143    fn resolve(&self, encoder: &mut wgpu::CommandEncoder) {
144        encoder.resolve_query_set(
145            &self.set,
146            // TODO(https://github.com/gfx-rs/wgpu/issues/3993): Musn't be larger than the number valid queries in the set.
147            0..self.next_unused_query,
148            &self.resolve_buffer,
149            0,
150        );
151        encoder.copy_buffer_to_buffer(
152            &self.resolve_buffer,
153            0,
154            &self.destination_buffer,
155            0,
156            self.resolve_buffer.size(),
157        );
158    }
159
160    fn wait_for_results(&self, device: &wgpu::Device, is_test_on_metal: bool) -> Vec<u64> {
161        self.destination_buffer
162            .slice(..)
163            .map_async(wgpu::MapMode::Read, |_| ());
164        let poll_type = if is_test_on_metal {
165            // Use a short timeout because the `timestamps_encoder` test (which
166            // is also marked as flaky) has been observed to hang on Metal.
167            //
168            // Note that a timeout here is *not* considered an error. In this
169            // particular case that is what we want, but in general, waits in
170            // tests should probably treat a timeout as an error.
171            wgpu::PollType::Wait {
172                submission_index: None,
173                timeout: Some(std::time::Duration::from_secs(5)),
174            }
175        } else {
176            wgpu::PollType::wait_indefinitely()
177        };
178        device.poll(poll_type).unwrap();
179
180        let timestamps = {
181            let timestamp_view = self
182                .destination_buffer
183                .slice(..(size_of::<u64>() as wgpu::BufferAddress * self.num_queries))
184                .get_mapped_range()
185                .unwrap();
186            bytemuck::allocation::pod_collect_to_vec(&timestamp_view)
187        };
188
189        self.destination_buffer.unmap();
190
191        timestamps
192    }
193}
194
195async fn run() {
196    // Instantiates instance of wgpu
197    let instance =
198        wgpu::Instance::new(wgpu::InstanceDescriptor::new_without_display_handle_from_env());
199
200    // `request_adapter` instantiates the general connection to the GPU
201    let adapter = instance
202        .request_adapter(&wgpu::RequestAdapterOptions::default())
203        .await
204        .expect("Failed to request adapter.");
205
206    // Check timestamp features.
207    let features = adapter.features()
208        & (wgpu::Features::TIMESTAMP_QUERY | wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES);
209    if features.contains(wgpu::Features::TIMESTAMP_QUERY) {
210        println!("Adapter supports timestamp queries.");
211    } else {
212        println!("Adapter does not support timestamp queries, aborting.");
213        return;
214    }
215    let timestamps_inside_passes = features.contains(wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES);
216    if timestamps_inside_passes {
217        println!("Adapter supports timestamp queries within passes.");
218    } else {
219        println!("Adapter does not support timestamp queries within passes.");
220    }
221
222    // `request_device` instantiates the feature specific connection to the GPU, defining some parameters,
223    //  `features` being the available features.
224    let (device, queue) = adapter
225        .request_device(&wgpu::DeviceDescriptor {
226            label: None,
227            required_features: features,
228            required_limits: wgpu::Limits::downlevel_defaults(),
229            experimental_features: wgpu::ExperimentalFeatures::disabled(),
230            memory_hints: wgpu::MemoryHints::MemoryUsage,
231            trace: wgpu::Trace::Off,
232        })
233        .await
234        .unwrap();
235
236    let queries = submit_render_and_compute_pass_with_queries(&device, &queue);
237    let raw_results = queries.wait_for_results(&device, false);
238    println!("Raw timestamp buffer contents: {raw_results:?}");
239    QueryResults::from_raw_results(raw_results, timestamps_inside_passes).print(&queue);
240}
241
242fn submit_render_and_compute_pass_with_queries(
243    device: &wgpu::Device,
244    queue: &wgpu::Queue,
245) -> Queries {
246    let mut encoder =
247        device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
248
249    let mut queries = Queries::new(device, QueryResults::NUM_QUERIES);
250    let shader = device.create_shader_module(wgpu::include_wgsl!("shader.wgsl"));
251
252    if device
253        .features()
254        .contains(wgpu::Features::TIMESTAMP_QUERY_INSIDE_ENCODERS)
255    {
256        encoder.write_timestamp(&queries.set, queries.next_unused_query);
257        queries.next_unused_query += 1;
258    }
259
260    // Render two triangles and profile it.
261    render_pass(
262        device,
263        &shader,
264        &mut encoder,
265        &queries.set,
266        &mut queries.next_unused_query,
267    );
268
269    // Compute a hash function on a single thread a bunch of time and profile it.
270    compute_pass(
271        device,
272        &shader,
273        &mut encoder,
274        &queries.set,
275        &mut queries.next_unused_query,
276    );
277
278    if device
279        .features()
280        .contains(wgpu::Features::TIMESTAMP_QUERY_INSIDE_ENCODERS)
281    {
282        encoder.write_timestamp(&queries.set, queries.next_unused_query);
283        queries.next_unused_query += 1;
284    }
285
286    queries.resolve(&mut encoder);
287    queue.submit(Some(encoder.finish()));
288
289    queries
290}
291
292fn compute_pass(
293    device: &wgpu::Device,
294    module: &wgpu::ShaderModule,
295    encoder: &mut wgpu::CommandEncoder,
296    query_set: &wgpu::QuerySet,
297    next_unused_query: &mut u32,
298) {
299    let storage_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
300        label: Some("Storage Buffer"),
301        contents: bytemuck::cast_slice(&[42]),
302        usage: wgpu::BufferUsages::STORAGE,
303    });
304    let compute_pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
305        label: None,
306        layout: None,
307        module,
308        entry_point: Some("main_cs"),
309        compilation_options: Default::default(),
310        cache: None,
311    });
312    let bind_group_layout = compute_pipeline.get_bind_group_layout(0);
313    let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {
314        label: None,
315        layout: &bind_group_layout,
316        entries: &[wgpu::BindGroupEntry {
317            binding: 0,
318            resource: storage_buffer.as_entire_binding(),
319        }],
320    });
321
322    let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
323        label: None,
324        timestamp_writes: Some(wgpu::ComputePassTimestampWrites {
325            query_set,
326            beginning_of_pass_write_index: Some(*next_unused_query),
327            end_of_pass_write_index: Some(*next_unused_query + 1),
328        }),
329    });
330    *next_unused_query += 2;
331    cpass.set_pipeline(&compute_pipeline);
332    cpass.set_bind_group(0, &bind_group, &[]);
333    cpass.dispatch_workgroups(1, 1, 1);
334    if device
335        .features()
336        .contains(wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES)
337    {
338        cpass.write_timestamp(query_set, *next_unused_query);
339        *next_unused_query += 1;
340    }
341    cpass.dispatch_workgroups(1, 1, 1);
342}
343
344fn render_pass(
345    device: &wgpu::Device,
346    module: &wgpu::ShaderModule,
347    encoder: &mut wgpu::CommandEncoder,
348    query_set: &wgpu::QuerySet,
349    next_unused_query: &mut u32,
350) {
351    let format = wgpu::TextureFormat::Rgba8Unorm;
352
353    let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
354        label: None,
355        bind_group_layouts: &[],
356        immediate_size: 0,
357    });
358
359    let render_pipeline = device.create_render_pipeline(&wgpu::RenderPipelineDescriptor {
360        label: None,
361        layout: Some(&pipeline_layout),
362        vertex: wgpu::VertexState {
363            module,
364            entry_point: Some("vs_main"),
365            compilation_options: Default::default(),
366            buffers: &[],
367        },
368        fragment: Some(wgpu::FragmentState {
369            module,
370            entry_point: Some("fs_main"),
371            compilation_options: Default::default(),
372            targets: &[Some(format.into())],
373        }),
374        primitive: wgpu::PrimitiveState::default(),
375        depth_stencil: None,
376        multisample: wgpu::MultisampleState::default(),
377        multiview_mask: None,
378        cache: None,
379    });
380    let render_target = device.create_texture(&wgpu::TextureDescriptor {
381        label: Some("rendertarget"),
382        size: wgpu::Extent3d {
383            width: 512,
384            height: 512,
385            depth_or_array_layers: 1,
386        },
387        mip_level_count: 1,
388        sample_count: 1,
389        dimension: wgpu::TextureDimension::D2,
390        format,
391        usage: wgpu::TextureUsages::RENDER_ATTACHMENT,
392        view_formats: &[format],
393    });
394    let render_target_view = render_target.create_view(&wgpu::TextureViewDescriptor::default());
395
396    let mut rpass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
397        label: None,
398        color_attachments: &[Some(wgpu::RenderPassColorAttachment {
399            view: &render_target_view,
400            depth_slice: None,
401            resolve_target: None,
402            ops: wgpu::Operations {
403                load: wgpu::LoadOp::Clear(wgpu::Color::GREEN),
404                store: wgpu::StoreOp::Store,
405            },
406        })],
407        depth_stencil_attachment: None,
408        timestamp_writes: Some(wgpu::RenderPassTimestampWrites {
409            query_set,
410            beginning_of_pass_write_index: Some(*next_unused_query),
411            end_of_pass_write_index: Some(*next_unused_query + 1),
412        }),
413        occlusion_query_set: None,
414        multiview_mask: None,
415    });
416    *next_unused_query += 2;
417
418    rpass.set_pipeline(&render_pipeline);
419
420    rpass.draw(0..3, 0..1);
421    if device
422        .features()
423        .contains(wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES)
424    {
425        rpass.write_timestamp(query_set, *next_unused_query);
426        *next_unused_query += 1;
427    }
428
429    rpass.draw(0..3, 0..1);
430}
431
432pub fn main() {
433    #[cfg(not(target_arch = "wasm32"))]
434    {
435        env_logger::init();
436        pollster::block_on(run());
437    }
438    #[cfg(target_arch = "wasm32")]
439    {
440        std::panic::set_hook(Box::new(console_error_panic_hook::hook));
441        console_log::init().expect("could not initialize logger");
442        wasm_bindgen_futures::spawn_local(run());
443    }
444}
445
446#[cfg(test)]
447pub mod tests {
448    use wgpu_test::{gpu_test, FailureCase, GpuTestConfiguration};
449
450    use super::{submit_render_and_compute_pass_with_queries, QueryResults};
451
452    #[gpu_test]
453    pub static TIMESTAMPS_PASS_BOUNDARIES: GpuTestConfiguration = GpuTestConfiguration::new()
454        .parameters(
455            wgpu_test::TestParameters::default()
456                .limits(wgpu::Limits::downlevel_defaults())
457                .features(wgpu::Features::TIMESTAMP_QUERY),
458        )
459        .run_sync(|ctx| test_timestamps(ctx, false, false));
460
461    #[gpu_test]
462    pub static TIMESTAMPS_ENCODER: GpuTestConfiguration = GpuTestConfiguration::new()
463        .parameters(
464            wgpu_test::TestParameters::default()
465                .limits(wgpu::Limits::downlevel_defaults())
466                .features(
467                    wgpu::Features::TIMESTAMP_QUERY
468                        | wgpu::Features::TIMESTAMP_QUERY_INSIDE_ENCODERS,
469                )
470                // see https://github.com/gfx-rs/wgpu/issues/2521
471                // If marking this test non-flaky, also consider removing the silent
472                // timeout in `wait_for_results`.
473                .expect_fail(FailureCase::always().panic("unexpected timestamp").flaky()),
474        )
475        .run_sync(|ctx| test_timestamps(ctx, true, false));
476
477    #[gpu_test]
478    pub static TIMESTAMPS_PASSES: GpuTestConfiguration = GpuTestConfiguration::new()
479        .parameters(
480            wgpu_test::TestParameters::default()
481                .limits(wgpu::Limits::downlevel_defaults())
482                .features(
483                    wgpu::Features::TIMESTAMP_QUERY
484                        | wgpu::Features::TIMESTAMP_QUERY_INSIDE_ENCODERS
485                        | wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES,
486                )
487                // see https://github.com/gfx-rs/wgpu/issues/2521
488                // If marking this test non-flaky, also consider removing the silent
489                // timeout in `wait_for_results`.
490                .expect_fail(FailureCase::always().panic("unexpected timestamp").flaky()),
491        )
492        .run_sync(|ctx| test_timestamps(ctx, true, true));
493
494    fn test_timestamps(
495        ctx: wgpu_test::TestingContext,
496        timestamps_on_encoder: bool,
497        timestamps_inside_passes: bool,
498    ) {
499        let is_metal = ctx.adapter.get_info().backend == wgpu::Backend::Metal;
500        let queries = submit_render_and_compute_pass_with_queries(&ctx.device, &ctx.queue);
501        let raw_results = queries.wait_for_results(&ctx.device, is_metal);
502        let QueryResults {
503            encoder_timestamps,
504            render_start_end_timestamps,
505            render_inside_timestamp,
506            compute_start_end_timestamps,
507            compute_inside_timestamp,
508        } = QueryResults::from_raw_results(raw_results, timestamps_inside_passes);
509
510        // Timestamps may wrap around, so can't really only reason about deltas!
511        // Making things worse, deltas are allowed to be zero.
512        let render_delta =
513            render_start_end_timestamps[1].wrapping_sub(render_start_end_timestamps[0]);
514        let compute_delta =
515            compute_start_end_timestamps[1].wrapping_sub(compute_start_end_timestamps[0]);
516        let encoder_delta = encoder_timestamps[1].wrapping_sub(encoder_timestamps[0]);
517
518        if timestamps_on_encoder {
519            assert!(encoder_delta > 0, "unexpected timestamp");
520            assert!(
521                encoder_delta >= render_delta + compute_delta,
522                "unexpected timestamp"
523            );
524        }
525        if let Some(render_inside_timestamp) = render_inside_timestamp {
526            assert!(
527                render_inside_timestamp >= render_start_end_timestamps[0],
528                "unexpected timestamp"
529            );
530            assert!(
531                render_inside_timestamp <= render_start_end_timestamps[1],
532                "unexpected timestamp"
533            );
534        }
535        if let Some(compute_inside_timestamp) = compute_inside_timestamp {
536            assert!(
537                compute_inside_timestamp >= compute_start_end_timestamps[0],
538                "unexpected timestamp"
539            );
540            assert!(
541                compute_inside_timestamp <= compute_start_end_timestamps[1],
542                "unexpected timestamp"
543            );
544        }
545    }
546}