1use wgpu::util::DeviceExt;
21
22struct Queries {
23 set: wgpu::QuerySet,
24 resolve_buffer: wgpu::Buffer,
25 destination_buffer: wgpu::Buffer,
26 num_queries: u64,
27 next_unused_query: u32,
28}
29
30struct QueryResults {
31 encoder_timestamps: [u64; 2],
32 render_start_end_timestamps: [u64; 2],
33 render_inside_timestamp: Option<u64>,
34 compute_start_end_timestamps: [u64; 2],
35 compute_inside_timestamp: Option<u64>,
36}
37
38impl QueryResults {
39 const NUM_QUERIES: u64 = 8;
49
50 #[expect(
51 clippy::redundant_closure,
52 reason = "false positive for `get_next_slot`, which needs to be used by reference"
53 )]
54 fn from_raw_results(timestamps: Vec<u64>, timestamps_inside_passes: bool) -> Self {
55 assert_eq!(timestamps.len(), Self::NUM_QUERIES as usize);
56
57 let mut next_slot = 0;
58 let mut get_next_slot = || {
59 let slot = timestamps[next_slot];
60 next_slot += 1;
61 slot
62 };
63
64 let mut encoder_timestamps = [0, 0];
65 encoder_timestamps[0] = get_next_slot();
66 let render_start_end_timestamps = [get_next_slot(), get_next_slot()];
67 let render_inside_timestamp = timestamps_inside_passes.then(|| get_next_slot());
68 let compute_start_end_timestamps = [get_next_slot(), get_next_slot()];
69 let compute_inside_timestamp = timestamps_inside_passes.then(|| get_next_slot());
70 encoder_timestamps[1] = get_next_slot();
71
72 QueryResults {
73 encoder_timestamps,
74 render_start_end_timestamps,
75 render_inside_timestamp,
76 compute_start_end_timestamps,
77 compute_inside_timestamp,
78 }
79 }
80
81 fn print(&self, queue: &wgpu::Queue) {
82 let period = queue.get_timestamp_period();
83 let elapsed_us = |start, end: u64| end.wrapping_sub(start) as f64 * period as f64 / 1000.0;
84
85 println!(
86 "Elapsed time before render until after compute: {:.2} μs",
87 elapsed_us(self.encoder_timestamps[0], self.encoder_timestamps[1]),
88 );
89 println!(
90 "Elapsed time render pass: {:.2} μs",
91 elapsed_us(
92 self.render_start_end_timestamps[0],
93 self.render_start_end_timestamps[1]
94 )
95 );
96 if let Some(timestamp) = self.render_inside_timestamp {
97 println!(
98 "Elapsed time first triangle: {:.2} μs",
99 elapsed_us(self.render_start_end_timestamps[0], timestamp)
100 );
101 }
102 println!(
103 "Elapsed time compute pass: {:.2} μs",
104 elapsed_us(
105 self.compute_start_end_timestamps[0],
106 self.compute_start_end_timestamps[1]
107 )
108 );
109 if let Some(timestamp) = self.compute_inside_timestamp {
110 println!(
111 "Elapsed time after first dispatch: {:.2} μs",
112 elapsed_us(self.compute_start_end_timestamps[0], timestamp)
113 );
114 }
115 }
116}
117
118impl Queries {
119 fn new(device: &wgpu::Device, num_queries: u64) -> Self {
120 Queries {
121 set: device.create_query_set(&wgpu::QuerySetDescriptor {
122 label: Some("Timestamp query set"),
123 count: num_queries as _,
124 ty: wgpu::QueryType::Timestamp,
125 }),
126 resolve_buffer: device.create_buffer(&wgpu::BufferDescriptor {
127 label: Some("query resolve buffer"),
128 size: size_of::<u64>() as u64 * num_queries,
129 usage: wgpu::BufferUsages::COPY_SRC | wgpu::BufferUsages::QUERY_RESOLVE,
130 mapped_at_creation: false,
131 }),
132 destination_buffer: device.create_buffer(&wgpu::BufferDescriptor {
133 label: Some("query dest buffer"),
134 size: size_of::<u64>() as u64 * num_queries,
135 usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
136 mapped_at_creation: false,
137 }),
138 num_queries,
139 next_unused_query: 0,
140 }
141 }
142
143 fn resolve(&self, encoder: &mut wgpu::CommandEncoder) {
144 encoder.resolve_query_set(
145 &self.set,
146 0..self.next_unused_query,
148 &self.resolve_buffer,
149 0,
150 );
151 encoder.copy_buffer_to_buffer(
152 &self.resolve_buffer,
153 0,
154 &self.destination_buffer,
155 0,
156 self.resolve_buffer.size(),
157 );
158 }
159
160 fn wait_for_results(&self, device: &wgpu::Device, is_test_on_metal: bool) -> Vec<u64> {
161 self.destination_buffer
162 .slice(..)
163 .map_async(wgpu::MapMode::Read, |_| ());
164 let poll_type = if is_test_on_metal {
165 wgpu::PollType::Wait {
172 submission_index: None,
173 timeout: Some(std::time::Duration::from_secs(5)),
174 }
175 } else {
176 wgpu::PollType::wait_indefinitely()
177 };
178 device.poll(poll_type).unwrap();
179
180 let timestamps = {
181 let timestamp_view = self
182 .destination_buffer
183 .slice(..(size_of::<u64>() as wgpu::BufferAddress * self.num_queries))
184 .get_mapped_range();
185 bytemuck::cast_slice(×tamp_view).to_vec()
186 };
187
188 self.destination_buffer.unmap();
189
190 timestamps
191 }
192}
193
194async fn run() {
195 let instance = wgpu::Instance::new(&wgpu::InstanceDescriptor::from_env_or_default());
197
198 let adapter = instance
200 .request_adapter(&wgpu::RequestAdapterOptions::default())
201 .await
202 .expect("Failed to request adapter.");
203
204 let features = adapter.features()
206 & (wgpu::Features::TIMESTAMP_QUERY | wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES);
207 if features.contains(wgpu::Features::TIMESTAMP_QUERY) {
208 println!("Adapter supports timestamp queries.");
209 } else {
210 println!("Adapter does not support timestamp queries, aborting.");
211 return;
212 }
213 let timestamps_inside_passes = features.contains(wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES);
214 if timestamps_inside_passes {
215 println!("Adapter supports timestamp queries within passes.");
216 } else {
217 println!("Adapter does not support timestamp queries within passes.");
218 }
219
220 let (device, queue) = adapter
223 .request_device(&wgpu::DeviceDescriptor {
224 label: None,
225 required_features: features,
226 required_limits: wgpu::Limits::downlevel_defaults(),
227 experimental_features: wgpu::ExperimentalFeatures::disabled(),
228 memory_hints: wgpu::MemoryHints::MemoryUsage,
229 trace: wgpu::Trace::Off,
230 })
231 .await
232 .unwrap();
233
234 let queries = submit_render_and_compute_pass_with_queries(&device, &queue);
235 let raw_results = queries.wait_for_results(&device, false);
236 println!("Raw timestamp buffer contents: {raw_results:?}");
237 QueryResults::from_raw_results(raw_results, timestamps_inside_passes).print(&queue);
238}
239
240fn submit_render_and_compute_pass_with_queries(
241 device: &wgpu::Device,
242 queue: &wgpu::Queue,
243) -> Queries {
244 let mut encoder =
245 device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
246
247 let mut queries = Queries::new(device, QueryResults::NUM_QUERIES);
248 let shader = device.create_shader_module(wgpu::include_wgsl!("shader.wgsl"));
249
250 if device
251 .features()
252 .contains(wgpu::Features::TIMESTAMP_QUERY_INSIDE_ENCODERS)
253 {
254 encoder.write_timestamp(&queries.set, queries.next_unused_query);
255 queries.next_unused_query += 1;
256 }
257
258 render_pass(
260 device,
261 &shader,
262 &mut encoder,
263 &queries.set,
264 &mut queries.next_unused_query,
265 );
266
267 compute_pass(
269 device,
270 &shader,
271 &mut encoder,
272 &queries.set,
273 &mut queries.next_unused_query,
274 );
275
276 if device
277 .features()
278 .contains(wgpu::Features::TIMESTAMP_QUERY_INSIDE_ENCODERS)
279 {
280 encoder.write_timestamp(&queries.set, queries.next_unused_query);
281 queries.next_unused_query += 1;
282 }
283
284 queries.resolve(&mut encoder);
285 queue.submit(Some(encoder.finish()));
286
287 queries
288}
289
290fn compute_pass(
291 device: &wgpu::Device,
292 module: &wgpu::ShaderModule,
293 encoder: &mut wgpu::CommandEncoder,
294 query_set: &wgpu::QuerySet,
295 next_unused_query: &mut u32,
296) {
297 let storage_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
298 label: Some("Storage Buffer"),
299 contents: bytemuck::cast_slice(&[42]),
300 usage: wgpu::BufferUsages::STORAGE,
301 });
302 let compute_pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
303 label: None,
304 layout: None,
305 module,
306 entry_point: Some("main_cs"),
307 compilation_options: Default::default(),
308 cache: None,
309 });
310 let bind_group_layout = compute_pipeline.get_bind_group_layout(0);
311 let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {
312 label: None,
313 layout: &bind_group_layout,
314 entries: &[wgpu::BindGroupEntry {
315 binding: 0,
316 resource: storage_buffer.as_entire_binding(),
317 }],
318 });
319
320 let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
321 label: None,
322 timestamp_writes: Some(wgpu::ComputePassTimestampWrites {
323 query_set,
324 beginning_of_pass_write_index: Some(*next_unused_query),
325 end_of_pass_write_index: Some(*next_unused_query + 1),
326 }),
327 });
328 *next_unused_query += 2;
329 cpass.set_pipeline(&compute_pipeline);
330 cpass.set_bind_group(0, &bind_group, &[]);
331 cpass.dispatch_workgroups(1, 1, 1);
332 if device
333 .features()
334 .contains(wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES)
335 {
336 cpass.write_timestamp(query_set, *next_unused_query);
337 *next_unused_query += 1;
338 }
339 cpass.dispatch_workgroups(1, 1, 1);
340}
341
342fn render_pass(
343 device: &wgpu::Device,
344 module: &wgpu::ShaderModule,
345 encoder: &mut wgpu::CommandEncoder,
346 query_set: &wgpu::QuerySet,
347 next_unused_query: &mut u32,
348) {
349 let format = wgpu::TextureFormat::Rgba8Unorm;
350
351 let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
352 label: None,
353 bind_group_layouts: &[],
354 push_constant_ranges: &[],
355 });
356
357 let render_pipeline = device.create_render_pipeline(&wgpu::RenderPipelineDescriptor {
358 label: None,
359 layout: Some(&pipeline_layout),
360 vertex: wgpu::VertexState {
361 module,
362 entry_point: Some("vs_main"),
363 compilation_options: Default::default(),
364 buffers: &[],
365 },
366 fragment: Some(wgpu::FragmentState {
367 module,
368 entry_point: Some("fs_main"),
369 compilation_options: Default::default(),
370 targets: &[Some(format.into())],
371 }),
372 primitive: wgpu::PrimitiveState::default(),
373 depth_stencil: None,
374 multisample: wgpu::MultisampleState::default(),
375 multiview_mask: None,
376 cache: None,
377 });
378 let render_target = device.create_texture(&wgpu::TextureDescriptor {
379 label: Some("rendertarget"),
380 size: wgpu::Extent3d {
381 width: 512,
382 height: 512,
383 depth_or_array_layers: 1,
384 },
385 mip_level_count: 1,
386 sample_count: 1,
387 dimension: wgpu::TextureDimension::D2,
388 format,
389 usage: wgpu::TextureUsages::RENDER_ATTACHMENT,
390 view_formats: &[format],
391 });
392 let render_target_view = render_target.create_view(&wgpu::TextureViewDescriptor::default());
393
394 let mut rpass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
395 label: None,
396 color_attachments: &[Some(wgpu::RenderPassColorAttachment {
397 view: &render_target_view,
398 depth_slice: None,
399 resolve_target: None,
400 ops: wgpu::Operations {
401 load: wgpu::LoadOp::Clear(wgpu::Color::GREEN),
402 store: wgpu::StoreOp::Store,
403 },
404 })],
405 depth_stencil_attachment: None,
406 timestamp_writes: Some(wgpu::RenderPassTimestampWrites {
407 query_set,
408 beginning_of_pass_write_index: Some(*next_unused_query),
409 end_of_pass_write_index: Some(*next_unused_query + 1),
410 }),
411 occlusion_query_set: None,
412 multiview_mask: None,
413 });
414 *next_unused_query += 2;
415
416 rpass.set_pipeline(&render_pipeline);
417
418 rpass.draw(0..3, 0..1);
419 if device
420 .features()
421 .contains(wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES)
422 {
423 rpass.write_timestamp(query_set, *next_unused_query);
424 *next_unused_query += 1;
425 }
426
427 rpass.draw(0..3, 0..1);
428}
429
430pub fn main() {
431 #[cfg(not(target_arch = "wasm32"))]
432 {
433 env_logger::init();
434 pollster::block_on(run());
435 }
436 #[cfg(target_arch = "wasm32")]
437 {
438 std::panic::set_hook(Box::new(console_error_panic_hook::hook));
439 console_log::init().expect("could not initialize logger");
440 wasm_bindgen_futures::spawn_local(run());
441 }
442}
443
444#[cfg(test)]
445pub mod tests {
446 use wgpu_test::{gpu_test, FailureCase, GpuTestConfiguration};
447
448 use super::{submit_render_and_compute_pass_with_queries, QueryResults};
449
450 #[gpu_test]
451 pub static TIMESTAMPS_PASS_BOUNDARIES: GpuTestConfiguration = GpuTestConfiguration::new()
452 .parameters(
453 wgpu_test::TestParameters::default()
454 .limits(wgpu::Limits::downlevel_defaults())
455 .features(wgpu::Features::TIMESTAMP_QUERY),
456 )
457 .run_sync(|ctx| test_timestamps(ctx, false, false));
458
459 #[gpu_test]
460 pub static TIMESTAMPS_ENCODER: GpuTestConfiguration = GpuTestConfiguration::new()
461 .parameters(
462 wgpu_test::TestParameters::default()
463 .limits(wgpu::Limits::downlevel_defaults())
464 .features(
465 wgpu::Features::TIMESTAMP_QUERY
466 | wgpu::Features::TIMESTAMP_QUERY_INSIDE_ENCODERS,
467 )
468 .expect_fail(FailureCase::always().panic("unexpected timestamp").flaky()),
472 )
473 .run_sync(|ctx| test_timestamps(ctx, true, false));
474
475 #[gpu_test]
476 pub static TIMESTAMPS_PASSES: GpuTestConfiguration = GpuTestConfiguration::new()
477 .parameters(
478 wgpu_test::TestParameters::default()
479 .limits(wgpu::Limits::downlevel_defaults())
480 .features(
481 wgpu::Features::TIMESTAMP_QUERY
482 | wgpu::Features::TIMESTAMP_QUERY_INSIDE_ENCODERS
483 | wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES,
484 )
485 .expect_fail(FailureCase::always().panic("unexpected timestamp").flaky()),
489 )
490 .run_sync(|ctx| test_timestamps(ctx, true, true));
491
492 fn test_timestamps(
493 ctx: wgpu_test::TestingContext,
494 timestamps_on_encoder: bool,
495 timestamps_inside_passes: bool,
496 ) {
497 let is_metal = ctx.adapter.get_info().backend == wgpu::Backend::Metal;
498 let queries = submit_render_and_compute_pass_with_queries(&ctx.device, &ctx.queue);
499 let raw_results = queries.wait_for_results(&ctx.device, is_metal);
500 let QueryResults {
501 encoder_timestamps,
502 render_start_end_timestamps,
503 render_inside_timestamp,
504 compute_start_end_timestamps,
505 compute_inside_timestamp,
506 } = QueryResults::from_raw_results(raw_results, timestamps_inside_passes);
507
508 let render_delta =
511 render_start_end_timestamps[1].wrapping_sub(render_start_end_timestamps[0]);
512 let compute_delta =
513 compute_start_end_timestamps[1].wrapping_sub(compute_start_end_timestamps[0]);
514 let encoder_delta = encoder_timestamps[1].wrapping_sub(encoder_timestamps[0]);
515
516 if timestamps_on_encoder {
517 assert!(encoder_delta > 0, "unexpected timestamp");
518 assert!(
519 encoder_delta >= render_delta + compute_delta,
520 "unexpected timestamp"
521 );
522 }
523 if let Some(render_inside_timestamp) = render_inside_timestamp {
524 assert!(
525 render_inside_timestamp >= render_start_end_timestamps[0],
526 "unexpected timestamp"
527 );
528 assert!(
529 render_inside_timestamp <= render_start_end_timestamps[1],
530 "unexpected timestamp"
531 );
532 }
533 if let Some(compute_inside_timestamp) = compute_inside_timestamp {
534 assert!(
535 compute_inside_timestamp >= compute_start_end_timestamps[0],
536 "unexpected timestamp"
537 );
538 assert!(
539 compute_inside_timestamp <= compute_start_end_timestamps[1],
540 "unexpected timestamp"
541 );
542 }
543 }
544}