1use wgpu::util::DeviceExt;
21
22struct Queries {
23 set: wgpu::QuerySet,
24 resolve_buffer: wgpu::Buffer,
25 destination_buffer: wgpu::Buffer,
26 num_queries: u64,
27 next_unused_query: u32,
28}
29
30struct QueryResults {
31 encoder_timestamps: [u64; 2],
32 render_start_end_timestamps: [u64; 2],
33 render_inside_timestamp: Option<u64>,
34 compute_start_end_timestamps: [u64; 2],
35 compute_inside_timestamp: Option<u64>,
36}
37
38impl QueryResults {
39 const NUM_QUERIES: u64 = 8;
49
50 #[expect(
51 clippy::redundant_closure,
52 reason = "false positive for `get_next_slot`, which needs to be used by reference"
53 )]
54 fn from_raw_results(timestamps: Vec<u64>, timestamps_inside_passes: bool) -> Self {
55 assert_eq!(timestamps.len(), Self::NUM_QUERIES as usize);
56
57 let mut next_slot = 0;
58 let mut get_next_slot = || {
59 let slot = timestamps[next_slot];
60 next_slot += 1;
61 slot
62 };
63
64 let mut encoder_timestamps = [0, 0];
65 encoder_timestamps[0] = get_next_slot();
66 let render_start_end_timestamps = [get_next_slot(), get_next_slot()];
67 let render_inside_timestamp = timestamps_inside_passes.then(|| get_next_slot());
68 let compute_start_end_timestamps = [get_next_slot(), get_next_slot()];
69 let compute_inside_timestamp = timestamps_inside_passes.then(|| get_next_slot());
70 encoder_timestamps[1] = get_next_slot();
71
72 QueryResults {
73 encoder_timestamps,
74 render_start_end_timestamps,
75 render_inside_timestamp,
76 compute_start_end_timestamps,
77 compute_inside_timestamp,
78 }
79 }
80
81 fn print(&self, queue: &wgpu::Queue) {
82 let period = queue.get_timestamp_period();
83 let elapsed_us = |start, end: u64| end.wrapping_sub(start) as f64 * period as f64 / 1000.0;
84
85 println!(
86 "Elapsed time before render until after compute: {:.2} μs",
87 elapsed_us(self.encoder_timestamps[0], self.encoder_timestamps[1]),
88 );
89 println!(
90 "Elapsed time render pass: {:.2} μs",
91 elapsed_us(
92 self.render_start_end_timestamps[0],
93 self.render_start_end_timestamps[1]
94 )
95 );
96 if let Some(timestamp) = self.render_inside_timestamp {
97 println!(
98 "Elapsed time first triangle: {:.2} μs",
99 elapsed_us(self.render_start_end_timestamps[0], timestamp)
100 );
101 }
102 println!(
103 "Elapsed time compute pass: {:.2} μs",
104 elapsed_us(
105 self.compute_start_end_timestamps[0],
106 self.compute_start_end_timestamps[1]
107 )
108 );
109 if let Some(timestamp) = self.compute_inside_timestamp {
110 println!(
111 "Elapsed time after first dispatch: {:.2} μs",
112 elapsed_us(self.compute_start_end_timestamps[0], timestamp)
113 );
114 }
115 }
116}
117
118impl Queries {
119 fn new(device: &wgpu::Device, num_queries: u64) -> Self {
120 Queries {
121 set: device.create_query_set(&wgpu::QuerySetDescriptor {
122 label: Some("Timestamp query set"),
123 count: num_queries as _,
124 ty: wgpu::QueryType::Timestamp,
125 }),
126 resolve_buffer: device.create_buffer(&wgpu::BufferDescriptor {
127 label: Some("query resolve buffer"),
128 size: size_of::<u64>() as u64 * num_queries,
129 usage: wgpu::BufferUsages::COPY_SRC | wgpu::BufferUsages::QUERY_RESOLVE,
130 mapped_at_creation: false,
131 }),
132 destination_buffer: device.create_buffer(&wgpu::BufferDescriptor {
133 label: Some("query dest buffer"),
134 size: size_of::<u64>() as u64 * num_queries,
135 usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
136 mapped_at_creation: false,
137 }),
138 num_queries,
139 next_unused_query: 0,
140 }
141 }
142
143 fn resolve(&self, encoder: &mut wgpu::CommandEncoder) {
144 encoder.resolve_query_set(
145 &self.set,
146 0..self.next_unused_query,
148 &self.resolve_buffer,
149 0,
150 );
151 encoder.copy_buffer_to_buffer(
152 &self.resolve_buffer,
153 0,
154 &self.destination_buffer,
155 0,
156 self.resolve_buffer.size(),
157 );
158 }
159
160 fn wait_for_results(&self, device: &wgpu::Device, is_test_on_metal: bool) -> Vec<u64> {
161 self.destination_buffer
162 .slice(..)
163 .map_async(wgpu::MapMode::Read, |_| ());
164 let poll_type = if is_test_on_metal {
165 wgpu::PollType::Wait {
172 submission_index: None,
173 timeout: Some(std::time::Duration::from_secs(5)),
174 }
175 } else {
176 wgpu::PollType::wait_indefinitely()
177 };
178 device.poll(poll_type).unwrap();
179
180 let timestamps = {
181 let timestamp_view = self
182 .destination_buffer
183 .slice(..(size_of::<u64>() as wgpu::BufferAddress * self.num_queries))
184 .get_mapped_range()
185 .unwrap();
186 bytemuck::allocation::pod_collect_to_vec(×tamp_view)
187 };
188
189 self.destination_buffer.unmap();
190
191 timestamps
192 }
193}
194
195async fn run() {
196 let instance =
198 wgpu::Instance::new(wgpu::InstanceDescriptor::new_without_display_handle_from_env());
199
200 let adapter = instance
202 .request_adapter(&wgpu::RequestAdapterOptions::default())
203 .await
204 .expect("Failed to request adapter.");
205
206 let features = adapter.features()
208 & (wgpu::Features::TIMESTAMP_QUERY | wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES);
209 if features.contains(wgpu::Features::TIMESTAMP_QUERY) {
210 println!("Adapter supports timestamp queries.");
211 } else {
212 println!("Adapter does not support timestamp queries, aborting.");
213 return;
214 }
215 let timestamps_inside_passes = features.contains(wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES);
216 if timestamps_inside_passes {
217 println!("Adapter supports timestamp queries within passes.");
218 } else {
219 println!("Adapter does not support timestamp queries within passes.");
220 }
221
222 let (device, queue) = adapter
225 .request_device(&wgpu::DeviceDescriptor {
226 label: None,
227 required_features: features,
228 required_limits: wgpu::Limits::downlevel_defaults(),
229 experimental_features: wgpu::ExperimentalFeatures::disabled(),
230 memory_hints: wgpu::MemoryHints::MemoryUsage,
231 trace: wgpu::Trace::Off,
232 })
233 .await
234 .unwrap();
235
236 let queries = submit_render_and_compute_pass_with_queries(&device, &queue);
237 let raw_results = queries.wait_for_results(&device, false);
238 println!("Raw timestamp buffer contents: {raw_results:?}");
239 QueryResults::from_raw_results(raw_results, timestamps_inside_passes).print(&queue);
240}
241
242fn submit_render_and_compute_pass_with_queries(
243 device: &wgpu::Device,
244 queue: &wgpu::Queue,
245) -> Queries {
246 let mut encoder =
247 device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
248
249 let mut queries = Queries::new(device, QueryResults::NUM_QUERIES);
250 let shader = device.create_shader_module(wgpu::include_wgsl!("shader.wgsl"));
251
252 if device
253 .features()
254 .contains(wgpu::Features::TIMESTAMP_QUERY_INSIDE_ENCODERS)
255 {
256 encoder.write_timestamp(&queries.set, queries.next_unused_query);
257 queries.next_unused_query += 1;
258 }
259
260 render_pass(
262 device,
263 &shader,
264 &mut encoder,
265 &queries.set,
266 &mut queries.next_unused_query,
267 );
268
269 compute_pass(
271 device,
272 &shader,
273 &mut encoder,
274 &queries.set,
275 &mut queries.next_unused_query,
276 );
277
278 if device
279 .features()
280 .contains(wgpu::Features::TIMESTAMP_QUERY_INSIDE_ENCODERS)
281 {
282 encoder.write_timestamp(&queries.set, queries.next_unused_query);
283 queries.next_unused_query += 1;
284 }
285
286 queries.resolve(&mut encoder);
287 queue.submit(Some(encoder.finish()));
288
289 queries
290}
291
292fn compute_pass(
293 device: &wgpu::Device,
294 module: &wgpu::ShaderModule,
295 encoder: &mut wgpu::CommandEncoder,
296 query_set: &wgpu::QuerySet,
297 next_unused_query: &mut u32,
298) {
299 let storage_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
300 label: Some("Storage Buffer"),
301 contents: bytemuck::cast_slice(&[42]),
302 usage: wgpu::BufferUsages::STORAGE,
303 });
304 let compute_pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
305 label: None,
306 layout: None,
307 module,
308 entry_point: Some("main_cs"),
309 compilation_options: Default::default(),
310 cache: None,
311 });
312 let bind_group_layout = compute_pipeline.get_bind_group_layout(0);
313 let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {
314 label: None,
315 layout: &bind_group_layout,
316 entries: &[wgpu::BindGroupEntry {
317 binding: 0,
318 resource: storage_buffer.as_entire_binding(),
319 }],
320 });
321
322 let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
323 label: None,
324 timestamp_writes: Some(wgpu::ComputePassTimestampWrites {
325 query_set,
326 beginning_of_pass_write_index: Some(*next_unused_query),
327 end_of_pass_write_index: Some(*next_unused_query + 1),
328 }),
329 });
330 *next_unused_query += 2;
331 cpass.set_pipeline(&compute_pipeline);
332 cpass.set_bind_group(0, &bind_group, &[]);
333 cpass.dispatch_workgroups(1, 1, 1);
334 if device
335 .features()
336 .contains(wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES)
337 {
338 cpass.write_timestamp(query_set, *next_unused_query);
339 *next_unused_query += 1;
340 }
341 cpass.dispatch_workgroups(1, 1, 1);
342}
343
344fn render_pass(
345 device: &wgpu::Device,
346 module: &wgpu::ShaderModule,
347 encoder: &mut wgpu::CommandEncoder,
348 query_set: &wgpu::QuerySet,
349 next_unused_query: &mut u32,
350) {
351 let format = wgpu::TextureFormat::Rgba8Unorm;
352
353 let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
354 label: None,
355 bind_group_layouts: &[],
356 immediate_size: 0,
357 });
358
359 let render_pipeline = device.create_render_pipeline(&wgpu::RenderPipelineDescriptor {
360 label: None,
361 layout: Some(&pipeline_layout),
362 vertex: wgpu::VertexState {
363 module,
364 entry_point: Some("vs_main"),
365 compilation_options: Default::default(),
366 buffers: &[],
367 },
368 fragment: Some(wgpu::FragmentState {
369 module,
370 entry_point: Some("fs_main"),
371 compilation_options: Default::default(),
372 targets: &[Some(format.into())],
373 }),
374 primitive: wgpu::PrimitiveState::default(),
375 depth_stencil: None,
376 multisample: wgpu::MultisampleState::default(),
377 multiview_mask: None,
378 cache: None,
379 });
380 let render_target = device.create_texture(&wgpu::TextureDescriptor {
381 label: Some("rendertarget"),
382 size: wgpu::Extent3d {
383 width: 512,
384 height: 512,
385 depth_or_array_layers: 1,
386 },
387 mip_level_count: 1,
388 sample_count: 1,
389 dimension: wgpu::TextureDimension::D2,
390 format,
391 usage: wgpu::TextureUsages::RENDER_ATTACHMENT,
392 view_formats: &[format],
393 });
394 let render_target_view = render_target.create_view(&wgpu::TextureViewDescriptor::default());
395
396 let mut rpass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
397 label: None,
398 color_attachments: &[Some(wgpu::RenderPassColorAttachment {
399 view: &render_target_view,
400 depth_slice: None,
401 resolve_target: None,
402 ops: wgpu::Operations {
403 load: wgpu::LoadOp::Clear(wgpu::Color::GREEN),
404 store: wgpu::StoreOp::Store,
405 },
406 })],
407 depth_stencil_attachment: None,
408 timestamp_writes: Some(wgpu::RenderPassTimestampWrites {
409 query_set,
410 beginning_of_pass_write_index: Some(*next_unused_query),
411 end_of_pass_write_index: Some(*next_unused_query + 1),
412 }),
413 occlusion_query_set: None,
414 multiview_mask: None,
415 });
416 *next_unused_query += 2;
417
418 rpass.set_pipeline(&render_pipeline);
419
420 rpass.draw(0..3, 0..1);
421 if device
422 .features()
423 .contains(wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES)
424 {
425 rpass.write_timestamp(query_set, *next_unused_query);
426 *next_unused_query += 1;
427 }
428
429 rpass.draw(0..3, 0..1);
430}
431
432pub fn main() {
433 #[cfg(not(target_arch = "wasm32"))]
434 {
435 env_logger::init();
436 pollster::block_on(run());
437 }
438 #[cfg(target_arch = "wasm32")]
439 {
440 std::panic::set_hook(Box::new(console_error_panic_hook::hook));
441 console_log::init().expect("could not initialize logger");
442 wasm_bindgen_futures::spawn_local(run());
443 }
444}
445
446#[cfg(test)]
447pub mod tests {
448 use wgpu_test::{gpu_test, FailureCase, GpuTestConfiguration};
449
450 use super::{submit_render_and_compute_pass_with_queries, QueryResults};
451
452 #[gpu_test]
453 pub static TIMESTAMPS_PASS_BOUNDARIES: GpuTestConfiguration = GpuTestConfiguration::new()
454 .parameters(
455 wgpu_test::TestParameters::default()
456 .limits(wgpu::Limits::downlevel_defaults())
457 .features(wgpu::Features::TIMESTAMP_QUERY),
458 )
459 .run_sync(|ctx| test_timestamps(ctx, false, false));
460
461 #[gpu_test]
462 pub static TIMESTAMPS_ENCODER: GpuTestConfiguration = GpuTestConfiguration::new()
463 .parameters(
464 wgpu_test::TestParameters::default()
465 .limits(wgpu::Limits::downlevel_defaults())
466 .features(
467 wgpu::Features::TIMESTAMP_QUERY
468 | wgpu::Features::TIMESTAMP_QUERY_INSIDE_ENCODERS,
469 )
470 .expect_fail(FailureCase::always().panic("unexpected timestamp").flaky()),
474 )
475 .run_sync(|ctx| test_timestamps(ctx, true, false));
476
477 #[gpu_test]
478 pub static TIMESTAMPS_PASSES: GpuTestConfiguration = GpuTestConfiguration::new()
479 .parameters(
480 wgpu_test::TestParameters::default()
481 .limits(wgpu::Limits::downlevel_defaults())
482 .features(
483 wgpu::Features::TIMESTAMP_QUERY
484 | wgpu::Features::TIMESTAMP_QUERY_INSIDE_ENCODERS
485 | wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES,
486 )
487 .expect_fail(FailureCase::always().panic("unexpected timestamp").flaky()),
491 )
492 .run_sync(|ctx| test_timestamps(ctx, true, true));
493
494 fn test_timestamps(
495 ctx: wgpu_test::TestingContext,
496 timestamps_on_encoder: bool,
497 timestamps_inside_passes: bool,
498 ) {
499 let is_metal = ctx.adapter.get_info().backend == wgpu::Backend::Metal;
500 let queries = submit_render_and_compute_pass_with_queries(&ctx.device, &ctx.queue);
501 let raw_results = queries.wait_for_results(&ctx.device, is_metal);
502 let QueryResults {
503 encoder_timestamps,
504 render_start_end_timestamps,
505 render_inside_timestamp,
506 compute_start_end_timestamps,
507 compute_inside_timestamp,
508 } = QueryResults::from_raw_results(raw_results, timestamps_inside_passes);
509
510 let render_delta =
513 render_start_end_timestamps[1].wrapping_sub(render_start_end_timestamps[0]);
514 let compute_delta =
515 compute_start_end_timestamps[1].wrapping_sub(compute_start_end_timestamps[0]);
516 let encoder_delta = encoder_timestamps[1].wrapping_sub(encoder_timestamps[0]);
517
518 if timestamps_on_encoder {
519 assert!(encoder_delta > 0, "unexpected timestamp");
520 assert!(
521 encoder_delta >= render_delta + compute_delta,
522 "unexpected timestamp"
523 );
524 }
525 if let Some(render_inside_timestamp) = render_inside_timestamp {
526 assert!(
527 render_inside_timestamp >= render_start_end_timestamps[0],
528 "unexpected timestamp"
529 );
530 assert!(
531 render_inside_timestamp <= render_start_end_timestamps[1],
532 "unexpected timestamp"
533 );
534 }
535 if let Some(compute_inside_timestamp) = compute_inside_timestamp {
536 assert!(
537 compute_inside_timestamp >= compute_start_end_timestamps[0],
538 "unexpected timestamp"
539 );
540 assert!(
541 compute_inside_timestamp <= compute_start_end_timestamps[1],
542 "unexpected timestamp"
543 );
544 }
545 }
546}