1use wgpu::util::DeviceExt;
21
22struct Queries {
23 set: wgpu::QuerySet,
24 resolve_buffer: wgpu::Buffer,
25 destination_buffer: wgpu::Buffer,
26 num_queries: u64,
27 next_unused_query: u32,
28}
29
30struct QueryResults {
31 encoder_timestamps: [u64; 2],
32 render_start_end_timestamps: [u64; 2],
33 render_inside_timestamp: Option<u64>,
34 compute_start_end_timestamps: [u64; 2],
35 compute_inside_timestamp: Option<u64>,
36}
37
38impl QueryResults {
39 const NUM_QUERIES: u64 = 8;
49
50 #[expect(
51 clippy::redundant_closure,
52 reason = "false positive for `get_next_slot`, which needs to be used by reference"
53 )]
54 fn from_raw_results(timestamps: Vec<u64>, timestamps_inside_passes: bool) -> Self {
55 assert_eq!(timestamps.len(), Self::NUM_QUERIES as usize);
56
57 let mut next_slot = 0;
58 let mut get_next_slot = || {
59 let slot = timestamps[next_slot];
60 next_slot += 1;
61 slot
62 };
63
64 let mut encoder_timestamps = [0, 0];
65 encoder_timestamps[0] = get_next_slot();
66 let render_start_end_timestamps = [get_next_slot(), get_next_slot()];
67 let render_inside_timestamp = timestamps_inside_passes.then(|| get_next_slot());
68 let compute_start_end_timestamps = [get_next_slot(), get_next_slot()];
69 let compute_inside_timestamp = timestamps_inside_passes.then(|| get_next_slot());
70 encoder_timestamps[1] = get_next_slot();
71
72 QueryResults {
73 encoder_timestamps,
74 render_start_end_timestamps,
75 render_inside_timestamp,
76 compute_start_end_timestamps,
77 compute_inside_timestamp,
78 }
79 }
80
81 fn print(&self, queue: &wgpu::Queue) {
82 let period = queue.get_timestamp_period();
83 let elapsed_us = |start, end: u64| end.wrapping_sub(start) as f64 * period as f64 / 1000.0;
84
85 println!(
86 "Elapsed time before render until after compute: {:.2} μs",
87 elapsed_us(self.encoder_timestamps[0], self.encoder_timestamps[1]),
88 );
89 println!(
90 "Elapsed time render pass: {:.2} μs",
91 elapsed_us(
92 self.render_start_end_timestamps[0],
93 self.render_start_end_timestamps[1]
94 )
95 );
96 if let Some(timestamp) = self.render_inside_timestamp {
97 println!(
98 "Elapsed time first triangle: {:.2} μs",
99 elapsed_us(self.render_start_end_timestamps[0], timestamp)
100 );
101 }
102 println!(
103 "Elapsed time compute pass: {:.2} μs",
104 elapsed_us(
105 self.compute_start_end_timestamps[0],
106 self.compute_start_end_timestamps[1]
107 )
108 );
109 if let Some(timestamp) = self.compute_inside_timestamp {
110 println!(
111 "Elapsed time after first dispatch: {:.2} μs",
112 elapsed_us(self.compute_start_end_timestamps[0], timestamp)
113 );
114 }
115 }
116}
117
118impl Queries {
119 fn new(device: &wgpu::Device, num_queries: u64) -> Self {
120 Queries {
121 set: device.create_query_set(&wgpu::QuerySetDescriptor {
122 label: Some("Timestamp query set"),
123 count: num_queries as _,
124 ty: wgpu::QueryType::Timestamp,
125 }),
126 resolve_buffer: device.create_buffer(&wgpu::BufferDescriptor {
127 label: Some("query resolve buffer"),
128 size: size_of::<u64>() as u64 * num_queries,
129 usage: wgpu::BufferUsages::COPY_SRC | wgpu::BufferUsages::QUERY_RESOLVE,
130 mapped_at_creation: false,
131 }),
132 destination_buffer: device.create_buffer(&wgpu::BufferDescriptor {
133 label: Some("query dest buffer"),
134 size: size_of::<u64>() as u64 * num_queries,
135 usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
136 mapped_at_creation: false,
137 }),
138 num_queries,
139 next_unused_query: 0,
140 }
141 }
142
143 fn resolve(&self, encoder: &mut wgpu::CommandEncoder) {
144 encoder.resolve_query_set(
145 &self.set,
146 0..self.next_unused_query,
148 &self.resolve_buffer,
149 0,
150 );
151 encoder.copy_buffer_to_buffer(
152 &self.resolve_buffer,
153 0,
154 &self.destination_buffer,
155 0,
156 self.resolve_buffer.size(),
157 );
158 }
159
160 fn wait_for_results(&self, device: &wgpu::Device, is_test_on_metal: bool) -> Vec<u64> {
161 self.destination_buffer
162 .slice(..)
163 .map_async(wgpu::MapMode::Read, |_| ());
164 let poll_type = if is_test_on_metal {
165 wgpu::PollType::Wait {
172 submission_index: None,
173 timeout: Some(std::time::Duration::from_secs(5)),
174 }
175 } else {
176 wgpu::PollType::wait_indefinitely()
177 };
178 device.poll(poll_type).unwrap();
179
180 let timestamps = {
181 let timestamp_view = self
182 .destination_buffer
183 .slice(..(size_of::<u64>() as wgpu::BufferAddress * self.num_queries))
184 .get_mapped_range();
185 bytemuck::cast_slice(×tamp_view).to_vec()
186 };
187
188 self.destination_buffer.unmap();
189
190 timestamps
191 }
192}
193
194async fn run() {
195 let instance =
197 wgpu::Instance::new(wgpu::InstanceDescriptor::new_without_display_handle_from_env());
198
199 let adapter = instance
201 .request_adapter(&wgpu::RequestAdapterOptions::default())
202 .await
203 .expect("Failed to request adapter.");
204
205 let features = adapter.features()
207 & (wgpu::Features::TIMESTAMP_QUERY | wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES);
208 if features.contains(wgpu::Features::TIMESTAMP_QUERY) {
209 println!("Adapter supports timestamp queries.");
210 } else {
211 println!("Adapter does not support timestamp queries, aborting.");
212 return;
213 }
214 let timestamps_inside_passes = features.contains(wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES);
215 if timestamps_inside_passes {
216 println!("Adapter supports timestamp queries within passes.");
217 } else {
218 println!("Adapter does not support timestamp queries within passes.");
219 }
220
221 let (device, queue) = adapter
224 .request_device(&wgpu::DeviceDescriptor {
225 label: None,
226 required_features: features,
227 required_limits: wgpu::Limits::downlevel_defaults(),
228 experimental_features: wgpu::ExperimentalFeatures::disabled(),
229 memory_hints: wgpu::MemoryHints::MemoryUsage,
230 trace: wgpu::Trace::Off,
231 })
232 .await
233 .unwrap();
234
235 let queries = submit_render_and_compute_pass_with_queries(&device, &queue);
236 let raw_results = queries.wait_for_results(&device, false);
237 println!("Raw timestamp buffer contents: {raw_results:?}");
238 QueryResults::from_raw_results(raw_results, timestamps_inside_passes).print(&queue);
239}
240
241fn submit_render_and_compute_pass_with_queries(
242 device: &wgpu::Device,
243 queue: &wgpu::Queue,
244) -> Queries {
245 let mut encoder =
246 device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
247
248 let mut queries = Queries::new(device, QueryResults::NUM_QUERIES);
249 let shader = device.create_shader_module(wgpu::include_wgsl!("shader.wgsl"));
250
251 if device
252 .features()
253 .contains(wgpu::Features::TIMESTAMP_QUERY_INSIDE_ENCODERS)
254 {
255 encoder.write_timestamp(&queries.set, queries.next_unused_query);
256 queries.next_unused_query += 1;
257 }
258
259 render_pass(
261 device,
262 &shader,
263 &mut encoder,
264 &queries.set,
265 &mut queries.next_unused_query,
266 );
267
268 compute_pass(
270 device,
271 &shader,
272 &mut encoder,
273 &queries.set,
274 &mut queries.next_unused_query,
275 );
276
277 if device
278 .features()
279 .contains(wgpu::Features::TIMESTAMP_QUERY_INSIDE_ENCODERS)
280 {
281 encoder.write_timestamp(&queries.set, queries.next_unused_query);
282 queries.next_unused_query += 1;
283 }
284
285 queries.resolve(&mut encoder);
286 queue.submit(Some(encoder.finish()));
287
288 queries
289}
290
291fn compute_pass(
292 device: &wgpu::Device,
293 module: &wgpu::ShaderModule,
294 encoder: &mut wgpu::CommandEncoder,
295 query_set: &wgpu::QuerySet,
296 next_unused_query: &mut u32,
297) {
298 let storage_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
299 label: Some("Storage Buffer"),
300 contents: bytemuck::cast_slice(&[42]),
301 usage: wgpu::BufferUsages::STORAGE,
302 });
303 let compute_pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
304 label: None,
305 layout: None,
306 module,
307 entry_point: Some("main_cs"),
308 compilation_options: Default::default(),
309 cache: None,
310 });
311 let bind_group_layout = compute_pipeline.get_bind_group_layout(0);
312 let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {
313 label: None,
314 layout: &bind_group_layout,
315 entries: &[wgpu::BindGroupEntry {
316 binding: 0,
317 resource: storage_buffer.as_entire_binding(),
318 }],
319 });
320
321 let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
322 label: None,
323 timestamp_writes: Some(wgpu::ComputePassTimestampWrites {
324 query_set,
325 beginning_of_pass_write_index: Some(*next_unused_query),
326 end_of_pass_write_index: Some(*next_unused_query + 1),
327 }),
328 });
329 *next_unused_query += 2;
330 cpass.set_pipeline(&compute_pipeline);
331 cpass.set_bind_group(0, &bind_group, &[]);
332 cpass.dispatch_workgroups(1, 1, 1);
333 if device
334 .features()
335 .contains(wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES)
336 {
337 cpass.write_timestamp(query_set, *next_unused_query);
338 *next_unused_query += 1;
339 }
340 cpass.dispatch_workgroups(1, 1, 1);
341}
342
343fn render_pass(
344 device: &wgpu::Device,
345 module: &wgpu::ShaderModule,
346 encoder: &mut wgpu::CommandEncoder,
347 query_set: &wgpu::QuerySet,
348 next_unused_query: &mut u32,
349) {
350 let format = wgpu::TextureFormat::Rgba8Unorm;
351
352 let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
353 label: None,
354 bind_group_layouts: &[],
355 immediate_size: 0,
356 });
357
358 let render_pipeline = device.create_render_pipeline(&wgpu::RenderPipelineDescriptor {
359 label: None,
360 layout: Some(&pipeline_layout),
361 vertex: wgpu::VertexState {
362 module,
363 entry_point: Some("vs_main"),
364 compilation_options: Default::default(),
365 buffers: &[],
366 },
367 fragment: Some(wgpu::FragmentState {
368 module,
369 entry_point: Some("fs_main"),
370 compilation_options: Default::default(),
371 targets: &[Some(format.into())],
372 }),
373 primitive: wgpu::PrimitiveState::default(),
374 depth_stencil: None,
375 multisample: wgpu::MultisampleState::default(),
376 multiview_mask: None,
377 cache: None,
378 });
379 let render_target = device.create_texture(&wgpu::TextureDescriptor {
380 label: Some("rendertarget"),
381 size: wgpu::Extent3d {
382 width: 512,
383 height: 512,
384 depth_or_array_layers: 1,
385 },
386 mip_level_count: 1,
387 sample_count: 1,
388 dimension: wgpu::TextureDimension::D2,
389 format,
390 usage: wgpu::TextureUsages::RENDER_ATTACHMENT,
391 view_formats: &[format],
392 });
393 let render_target_view = render_target.create_view(&wgpu::TextureViewDescriptor::default());
394
395 let mut rpass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
396 label: None,
397 color_attachments: &[Some(wgpu::RenderPassColorAttachment {
398 view: &render_target_view,
399 depth_slice: None,
400 resolve_target: None,
401 ops: wgpu::Operations {
402 load: wgpu::LoadOp::Clear(wgpu::Color::GREEN),
403 store: wgpu::StoreOp::Store,
404 },
405 })],
406 depth_stencil_attachment: None,
407 timestamp_writes: Some(wgpu::RenderPassTimestampWrites {
408 query_set,
409 beginning_of_pass_write_index: Some(*next_unused_query),
410 end_of_pass_write_index: Some(*next_unused_query + 1),
411 }),
412 occlusion_query_set: None,
413 multiview_mask: None,
414 });
415 *next_unused_query += 2;
416
417 rpass.set_pipeline(&render_pipeline);
418
419 rpass.draw(0..3, 0..1);
420 if device
421 .features()
422 .contains(wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES)
423 {
424 rpass.write_timestamp(query_set, *next_unused_query);
425 *next_unused_query += 1;
426 }
427
428 rpass.draw(0..3, 0..1);
429}
430
431pub fn main() {
432 #[cfg(not(target_arch = "wasm32"))]
433 {
434 env_logger::init();
435 pollster::block_on(run());
436 }
437 #[cfg(target_arch = "wasm32")]
438 {
439 std::panic::set_hook(Box::new(console_error_panic_hook::hook));
440 console_log::init().expect("could not initialize logger");
441 wasm_bindgen_futures::spawn_local(run());
442 }
443}
444
445#[cfg(test)]
446pub mod tests {
447 use wgpu_test::{gpu_test, FailureCase, GpuTestConfiguration};
448
449 use super::{submit_render_and_compute_pass_with_queries, QueryResults};
450
451 #[gpu_test]
452 pub static TIMESTAMPS_PASS_BOUNDARIES: GpuTestConfiguration = GpuTestConfiguration::new()
453 .parameters(
454 wgpu_test::TestParameters::default()
455 .limits(wgpu::Limits::downlevel_defaults())
456 .features(wgpu::Features::TIMESTAMP_QUERY),
457 )
458 .run_sync(|ctx| test_timestamps(ctx, false, false));
459
460 #[gpu_test]
461 pub static TIMESTAMPS_ENCODER: GpuTestConfiguration = GpuTestConfiguration::new()
462 .parameters(
463 wgpu_test::TestParameters::default()
464 .limits(wgpu::Limits::downlevel_defaults())
465 .features(
466 wgpu::Features::TIMESTAMP_QUERY
467 | wgpu::Features::TIMESTAMP_QUERY_INSIDE_ENCODERS,
468 )
469 .expect_fail(FailureCase::always().panic("unexpected timestamp").flaky()),
473 )
474 .run_sync(|ctx| test_timestamps(ctx, true, false));
475
476 #[gpu_test]
477 pub static TIMESTAMPS_PASSES: GpuTestConfiguration = GpuTestConfiguration::new()
478 .parameters(
479 wgpu_test::TestParameters::default()
480 .limits(wgpu::Limits::downlevel_defaults())
481 .features(
482 wgpu::Features::TIMESTAMP_QUERY
483 | wgpu::Features::TIMESTAMP_QUERY_INSIDE_ENCODERS
484 | wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES,
485 )
486 .expect_fail(FailureCase::always().panic("unexpected timestamp").flaky()),
490 )
491 .run_sync(|ctx| test_timestamps(ctx, true, true));
492
493 fn test_timestamps(
494 ctx: wgpu_test::TestingContext,
495 timestamps_on_encoder: bool,
496 timestamps_inside_passes: bool,
497 ) {
498 let is_metal = ctx.adapter.get_info().backend == wgpu::Backend::Metal;
499 let queries = submit_render_and_compute_pass_with_queries(&ctx.device, &ctx.queue);
500 let raw_results = queries.wait_for_results(&ctx.device, is_metal);
501 let QueryResults {
502 encoder_timestamps,
503 render_start_end_timestamps,
504 render_inside_timestamp,
505 compute_start_end_timestamps,
506 compute_inside_timestamp,
507 } = QueryResults::from_raw_results(raw_results, timestamps_inside_passes);
508
509 let render_delta =
512 render_start_end_timestamps[1].wrapping_sub(render_start_end_timestamps[0]);
513 let compute_delta =
514 compute_start_end_timestamps[1].wrapping_sub(compute_start_end_timestamps[0]);
515 let encoder_delta = encoder_timestamps[1].wrapping_sub(encoder_timestamps[0]);
516
517 if timestamps_on_encoder {
518 assert!(encoder_delta > 0, "unexpected timestamp");
519 assert!(
520 encoder_delta >= render_delta + compute_delta,
521 "unexpected timestamp"
522 );
523 }
524 if let Some(render_inside_timestamp) = render_inside_timestamp {
525 assert!(
526 render_inside_timestamp >= render_start_end_timestamps[0],
527 "unexpected timestamp"
528 );
529 assert!(
530 render_inside_timestamp <= render_start_end_timestamps[1],
531 "unexpected timestamp"
532 );
533 }
534 if let Some(compute_inside_timestamp) = compute_inside_timestamp {
535 assert!(
536 compute_inside_timestamp >= compute_start_end_timestamps[0],
537 "unexpected timestamp"
538 );
539 assert!(
540 compute_inside_timestamp <= compute_start_end_timestamps[1],
541 "unexpected timestamp"
542 );
543 }
544 }
545}