1use wgpu::util::DeviceExt;
21
22struct Queries {
23 set: wgpu::QuerySet,
24 resolve_buffer: wgpu::Buffer,
25 destination_buffer: wgpu::Buffer,
26 num_queries: u64,
27 next_unused_query: u32,
28}
29
30struct QueryResults {
31 encoder_timestamps: [u64; 2],
32 render_start_end_timestamps: [u64; 2],
33 render_inside_timestamp: Option<u64>,
34 compute_start_end_timestamps: [u64; 2],
35 compute_inside_timestamp: Option<u64>,
36}
37
38impl QueryResults {
39 const NUM_QUERIES: u64 = 8;
49
50 #[expect(
51 clippy::redundant_closure,
52 reason = "false positive for `get_next_slot`, which needs to be used by reference"
53 )]
54 fn from_raw_results(timestamps: Vec<u64>, timestamps_inside_passes: bool) -> Self {
55 assert_eq!(timestamps.len(), Self::NUM_QUERIES as usize);
56
57 let mut next_slot = 0;
58 let mut get_next_slot = || {
59 let slot = timestamps[next_slot];
60 next_slot += 1;
61 slot
62 };
63
64 let mut encoder_timestamps = [0, 0];
65 encoder_timestamps[0] = get_next_slot();
66 let render_start_end_timestamps = [get_next_slot(), get_next_slot()];
67 let render_inside_timestamp = timestamps_inside_passes.then(|| get_next_slot());
68 let compute_start_end_timestamps = [get_next_slot(), get_next_slot()];
69 let compute_inside_timestamp = timestamps_inside_passes.then(|| get_next_slot());
70 encoder_timestamps[1] = get_next_slot();
71
72 QueryResults {
73 encoder_timestamps,
74 render_start_end_timestamps,
75 render_inside_timestamp,
76 compute_start_end_timestamps,
77 compute_inside_timestamp,
78 }
79 }
80
81 fn print(&self, queue: &wgpu::Queue) {
82 let period = queue.get_timestamp_period();
83 let elapsed_us = |start, end: u64| end.wrapping_sub(start) as f64 * period as f64 / 1000.0;
84
85 println!(
86 "Elapsed time before render until after compute: {:.2} μs",
87 elapsed_us(self.encoder_timestamps[0], self.encoder_timestamps[1]),
88 );
89 println!(
90 "Elapsed time render pass: {:.2} μs",
91 elapsed_us(
92 self.render_start_end_timestamps[0],
93 self.render_start_end_timestamps[1]
94 )
95 );
96 if let Some(timestamp) = self.render_inside_timestamp {
97 println!(
98 "Elapsed time first triangle: {:.2} μs",
99 elapsed_us(self.render_start_end_timestamps[0], timestamp)
100 );
101 }
102 println!(
103 "Elapsed time compute pass: {:.2} μs",
104 elapsed_us(
105 self.compute_start_end_timestamps[0],
106 self.compute_start_end_timestamps[1]
107 )
108 );
109 if let Some(timestamp) = self.compute_inside_timestamp {
110 println!(
111 "Elapsed time after first dispatch: {:.2} μs",
112 elapsed_us(self.compute_start_end_timestamps[0], timestamp)
113 );
114 }
115 }
116}
117
118impl Queries {
119 fn new(device: &wgpu::Device, num_queries: u64) -> Self {
120 Queries {
121 set: device.create_query_set(&wgpu::QuerySetDescriptor {
122 label: Some("Timestamp query set"),
123 count: num_queries as _,
124 ty: wgpu::QueryType::Timestamp,
125 }),
126 resolve_buffer: device.create_buffer(&wgpu::BufferDescriptor {
127 label: Some("query resolve buffer"),
128 size: size_of::<u64>() as u64 * num_queries,
129 usage: wgpu::BufferUsages::COPY_SRC | wgpu::BufferUsages::QUERY_RESOLVE,
130 mapped_at_creation: false,
131 }),
132 destination_buffer: device.create_buffer(&wgpu::BufferDescriptor {
133 label: Some("query dest buffer"),
134 size: size_of::<u64>() as u64 * num_queries,
135 usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
136 mapped_at_creation: false,
137 }),
138 num_queries,
139 next_unused_query: 0,
140 }
141 }
142
143 fn resolve(&self, encoder: &mut wgpu::CommandEncoder) {
144 encoder.resolve_query_set(
145 &self.set,
146 0..self.next_unused_query,
148 &self.resolve_buffer,
149 0,
150 );
151 encoder.copy_buffer_to_buffer(
152 &self.resolve_buffer,
153 0,
154 &self.destination_buffer,
155 0,
156 self.resolve_buffer.size(),
157 );
158 }
159
160 fn wait_for_results(&self, device: &wgpu::Device) -> Vec<u64> {
161 self.destination_buffer
162 .slice(..)
163 .map_async(wgpu::MapMode::Read, |_| ());
164 device.poll(wgpu::PollType::wait()).unwrap();
165
166 let timestamps = {
167 let timestamp_view = self
168 .destination_buffer
169 .slice(..(size_of::<u64>() as wgpu::BufferAddress * self.num_queries))
170 .get_mapped_range();
171 bytemuck::cast_slice(×tamp_view).to_vec()
172 };
173
174 self.destination_buffer.unmap();
175
176 timestamps
177 }
178}
179
180async fn run() {
181 let instance = wgpu::Instance::new(&wgpu::InstanceDescriptor::from_env_or_default());
183
184 let adapter = instance
186 .request_adapter(&wgpu::RequestAdapterOptions::default())
187 .await
188 .expect("Failed to request adapter.");
189
190 let features = adapter.features()
192 & (wgpu::Features::TIMESTAMP_QUERY | wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES);
193 if features.contains(wgpu::Features::TIMESTAMP_QUERY) {
194 println!("Adapter supports timestamp queries.");
195 } else {
196 println!("Adapter does not support timestamp queries, aborting.");
197 return;
198 }
199 let timestamps_inside_passes = features.contains(wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES);
200 if timestamps_inside_passes {
201 println!("Adapter supports timestamp queries within passes.");
202 } else {
203 println!("Adapter does not support timestamp queries within passes.");
204 }
205
206 let (device, queue) = adapter
209 .request_device(&wgpu::DeviceDescriptor {
210 label: None,
211 required_features: features,
212 required_limits: wgpu::Limits::downlevel_defaults(),
213 memory_hints: wgpu::MemoryHints::MemoryUsage,
214 trace: wgpu::Trace::Off,
215 })
216 .await
217 .unwrap();
218
219 let queries = submit_render_and_compute_pass_with_queries(&device, &queue);
220 let raw_results = queries.wait_for_results(&device);
221 println!("Raw timestamp buffer contents: {raw_results:?}");
222 QueryResults::from_raw_results(raw_results, timestamps_inside_passes).print(&queue);
223}
224
225fn submit_render_and_compute_pass_with_queries(
226 device: &wgpu::Device,
227 queue: &wgpu::Queue,
228) -> Queries {
229 let mut encoder =
230 device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
231
232 let mut queries = Queries::new(device, QueryResults::NUM_QUERIES);
233 let shader = device.create_shader_module(wgpu::include_wgsl!("shader.wgsl"));
234
235 if device
236 .features()
237 .contains(wgpu::Features::TIMESTAMP_QUERY_INSIDE_ENCODERS)
238 {
239 encoder.write_timestamp(&queries.set, queries.next_unused_query);
240 queries.next_unused_query += 1;
241 }
242
243 render_pass(
245 device,
246 &shader,
247 &mut encoder,
248 &queries.set,
249 &mut queries.next_unused_query,
250 );
251
252 compute_pass(
254 device,
255 &shader,
256 &mut encoder,
257 &queries.set,
258 &mut queries.next_unused_query,
259 );
260
261 if device
262 .features()
263 .contains(wgpu::Features::TIMESTAMP_QUERY_INSIDE_ENCODERS)
264 {
265 encoder.write_timestamp(&queries.set, queries.next_unused_query);
266 queries.next_unused_query += 1;
267 }
268
269 queries.resolve(&mut encoder);
270 queue.submit(Some(encoder.finish()));
271
272 queries
273}
274
275fn compute_pass(
276 device: &wgpu::Device,
277 module: &wgpu::ShaderModule,
278 encoder: &mut wgpu::CommandEncoder,
279 query_set: &wgpu::QuerySet,
280 next_unused_query: &mut u32,
281) {
282 let storage_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
283 label: Some("Storage Buffer"),
284 contents: bytemuck::cast_slice(&[42]),
285 usage: wgpu::BufferUsages::STORAGE,
286 });
287 let compute_pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
288 label: None,
289 layout: None,
290 module,
291 entry_point: Some("main_cs"),
292 compilation_options: Default::default(),
293 cache: None,
294 });
295 let bind_group_layout = compute_pipeline.get_bind_group_layout(0);
296 let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {
297 label: None,
298 layout: &bind_group_layout,
299 entries: &[wgpu::BindGroupEntry {
300 binding: 0,
301 resource: storage_buffer.as_entire_binding(),
302 }],
303 });
304
305 let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
306 label: None,
307 timestamp_writes: Some(wgpu::ComputePassTimestampWrites {
308 query_set,
309 beginning_of_pass_write_index: Some(*next_unused_query),
310 end_of_pass_write_index: Some(*next_unused_query + 1),
311 }),
312 });
313 *next_unused_query += 2;
314 cpass.set_pipeline(&compute_pipeline);
315 cpass.set_bind_group(0, &bind_group, &[]);
316 cpass.dispatch_workgroups(1, 1, 1);
317 if device
318 .features()
319 .contains(wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES)
320 {
321 cpass.write_timestamp(query_set, *next_unused_query);
322 *next_unused_query += 1;
323 }
324 cpass.dispatch_workgroups(1, 1, 1);
325}
326
327fn render_pass(
328 device: &wgpu::Device,
329 module: &wgpu::ShaderModule,
330 encoder: &mut wgpu::CommandEncoder,
331 query_set: &wgpu::QuerySet,
332 next_unused_query: &mut u32,
333) {
334 let format = wgpu::TextureFormat::Rgba8Unorm;
335
336 let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
337 label: None,
338 bind_group_layouts: &[],
339 push_constant_ranges: &[],
340 });
341
342 let render_pipeline = device.create_render_pipeline(&wgpu::RenderPipelineDescriptor {
343 label: None,
344 layout: Some(&pipeline_layout),
345 vertex: wgpu::VertexState {
346 module,
347 entry_point: Some("vs_main"),
348 compilation_options: Default::default(),
349 buffers: &[],
350 },
351 fragment: Some(wgpu::FragmentState {
352 module,
353 entry_point: Some("fs_main"),
354 compilation_options: Default::default(),
355 targets: &[Some(format.into())],
356 }),
357 primitive: wgpu::PrimitiveState::default(),
358 depth_stencil: None,
359 multisample: wgpu::MultisampleState::default(),
360 multiview: None,
361 cache: None,
362 });
363 let render_target = device.create_texture(&wgpu::TextureDescriptor {
364 label: Some("rendertarget"),
365 size: wgpu::Extent3d {
366 width: 512,
367 height: 512,
368 depth_or_array_layers: 1,
369 },
370 mip_level_count: 1,
371 sample_count: 1,
372 dimension: wgpu::TextureDimension::D2,
373 format,
374 usage: wgpu::TextureUsages::RENDER_ATTACHMENT,
375 view_formats: &[format],
376 });
377 let render_target_view = render_target.create_view(&wgpu::TextureViewDescriptor::default());
378
379 let mut rpass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
380 label: None,
381 color_attachments: &[Some(wgpu::RenderPassColorAttachment {
382 view: &render_target_view,
383 depth_slice: None,
384 resolve_target: None,
385 ops: wgpu::Operations {
386 load: wgpu::LoadOp::Clear(wgpu::Color::GREEN),
387 store: wgpu::StoreOp::Store,
388 },
389 })],
390 depth_stencil_attachment: None,
391 timestamp_writes: Some(wgpu::RenderPassTimestampWrites {
392 query_set,
393 beginning_of_pass_write_index: Some(*next_unused_query),
394 end_of_pass_write_index: Some(*next_unused_query + 1),
395 }),
396 occlusion_query_set: None,
397 });
398 *next_unused_query += 2;
399
400 rpass.set_pipeline(&render_pipeline);
401
402 rpass.draw(0..3, 0..1);
403 if device
404 .features()
405 .contains(wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES)
406 {
407 rpass.write_timestamp(query_set, *next_unused_query);
408 *next_unused_query += 1;
409 }
410
411 rpass.draw(0..3, 0..1);
412}
413
414pub fn main() {
415 #[cfg(not(target_arch = "wasm32"))]
416 {
417 env_logger::init();
418 pollster::block_on(run());
419 }
420 #[cfg(target_arch = "wasm32")]
421 {
422 std::panic::set_hook(Box::new(console_error_panic_hook::hook));
423 console_log::init().expect("could not initialize logger");
424 wasm_bindgen_futures::spawn_local(run());
425 }
426}
427
428#[cfg(test)]
429mod tests {
430 use wgpu_test::{gpu_test, FailureCase, GpuTestConfiguration};
431
432 use super::{submit_render_and_compute_pass_with_queries, QueryResults};
433
434 #[gpu_test]
435 static TIMESTAMPS_PASS_BOUNDARIES: GpuTestConfiguration = GpuTestConfiguration::new()
436 .parameters(
437 wgpu_test::TestParameters::default()
438 .limits(wgpu::Limits::downlevel_defaults())
439 .features(wgpu::Features::TIMESTAMP_QUERY),
440 )
441 .run_sync(|ctx| test_timestamps(ctx, false, false));
442
443 #[gpu_test]
444 static TIMESTAMPS_ENCODER: GpuTestConfiguration = GpuTestConfiguration::new()
445 .parameters(
446 wgpu_test::TestParameters::default()
447 .limits(wgpu::Limits::downlevel_defaults())
448 .features(
449 wgpu::Features::TIMESTAMP_QUERY
450 | wgpu::Features::TIMESTAMP_QUERY_INSIDE_ENCODERS,
451 )
452 .expect_fail(FailureCase::always().panic("unexpected timestamp").flaky()),
454 )
455 .run_sync(|ctx| test_timestamps(ctx, true, false));
456
457 #[gpu_test]
458 static TIMESTAMPS_PASSES: GpuTestConfiguration = GpuTestConfiguration::new()
459 .parameters(
460 wgpu_test::TestParameters::default()
461 .limits(wgpu::Limits::downlevel_defaults())
462 .features(
463 wgpu::Features::TIMESTAMP_QUERY
464 | wgpu::Features::TIMESTAMP_QUERY_INSIDE_ENCODERS
465 | wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES,
466 )
467 .expect_fail(FailureCase::always().panic("unexpected timestamp").flaky()),
469 )
470 .run_sync(|ctx| test_timestamps(ctx, true, true));
471
472 fn test_timestamps(
473 ctx: wgpu_test::TestingContext,
474 timestamps_on_encoder: bool,
475 timestamps_inside_passes: bool,
476 ) {
477 let queries = submit_render_and_compute_pass_with_queries(&ctx.device, &ctx.queue);
478 let raw_results = queries.wait_for_results(&ctx.device);
479 let QueryResults {
480 encoder_timestamps,
481 render_start_end_timestamps,
482 render_inside_timestamp,
483 compute_start_end_timestamps,
484 compute_inside_timestamp,
485 } = QueryResults::from_raw_results(raw_results, timestamps_inside_passes);
486
487 let render_delta =
490 render_start_end_timestamps[1].wrapping_sub(render_start_end_timestamps[0]);
491 let compute_delta =
492 compute_start_end_timestamps[1].wrapping_sub(compute_start_end_timestamps[0]);
493 let encoder_delta = encoder_timestamps[1].wrapping_sub(encoder_timestamps[0]);
494
495 if timestamps_on_encoder {
496 assert!(encoder_delta > 0, "unexpected timestamp");
497 assert!(
498 encoder_delta >= render_delta + compute_delta,
499 "unexpected timestamp"
500 );
501 }
502 if let Some(render_inside_timestamp) = render_inside_timestamp {
503 assert!(
504 render_inside_timestamp >= render_start_end_timestamps[0],
505 "unexpected timestamp"
506 );
507 assert!(
508 render_inside_timestamp <= render_start_end_timestamps[1],
509 "unexpected timestamp"
510 );
511 }
512 if let Some(compute_inside_timestamp) = compute_inside_timestamp {
513 assert!(
514 compute_inside_timestamp >= compute_start_end_timestamps[0],
515 "unexpected timestamp"
516 );
517 assert!(
518 compute_inside_timestamp <= compute_start_end_timestamps[1],
519 "unexpected timestamp"
520 );
521 }
522 }
523}