1use wgpu::util::DeviceExt;
21
22struct Queries {
23 set: wgpu::QuerySet,
24 resolve_buffer: wgpu::Buffer,
25 destination_buffer: wgpu::Buffer,
26 num_queries: u64,
27 next_unused_query: u32,
28}
29
30struct QueryResults {
31 encoder_timestamps: [u64; 2],
32 render_start_end_timestamps: [u64; 2],
33 render_inside_timestamp: Option<u64>,
34 compute_start_end_timestamps: [u64; 2],
35 compute_inside_timestamp: Option<u64>,
36}
37
38impl QueryResults {
39 const NUM_QUERIES: u64 = 8;
49
50 #[expect(
51 clippy::redundant_closure,
52 reason = "false positive for `get_next_slot`, which needs to be used by reference"
53 )]
54 fn from_raw_results(timestamps: Vec<u64>, timestamps_inside_passes: bool) -> Self {
55 assert_eq!(timestamps.len(), Self::NUM_QUERIES as usize);
56
57 let mut next_slot = 0;
58 let mut get_next_slot = || {
59 let slot = timestamps[next_slot];
60 next_slot += 1;
61 slot
62 };
63
64 let mut encoder_timestamps = [0, 0];
65 encoder_timestamps[0] = get_next_slot();
66 let render_start_end_timestamps = [get_next_slot(), get_next_slot()];
67 let render_inside_timestamp = timestamps_inside_passes.then(|| get_next_slot());
68 let compute_start_end_timestamps = [get_next_slot(), get_next_slot()];
69 let compute_inside_timestamp = timestamps_inside_passes.then(|| get_next_slot());
70 encoder_timestamps[1] = get_next_slot();
71
72 QueryResults {
73 encoder_timestamps,
74 render_start_end_timestamps,
75 render_inside_timestamp,
76 compute_start_end_timestamps,
77 compute_inside_timestamp,
78 }
79 }
80
81 fn print(&self, queue: &wgpu::Queue) {
82 let period = queue.get_timestamp_period();
83 let elapsed_us = |start, end: u64| end.wrapping_sub(start) as f64 * period as f64 / 1000.0;
84
85 println!(
86 "Elapsed time before render until after compute: {:.2} μs",
87 elapsed_us(self.encoder_timestamps[0], self.encoder_timestamps[1]),
88 );
89 println!(
90 "Elapsed time render pass: {:.2} μs",
91 elapsed_us(
92 self.render_start_end_timestamps[0],
93 self.render_start_end_timestamps[1]
94 )
95 );
96 if let Some(timestamp) = self.render_inside_timestamp {
97 println!(
98 "Elapsed time first triangle: {:.2} μs",
99 elapsed_us(self.render_start_end_timestamps[0], timestamp)
100 );
101 }
102 println!(
103 "Elapsed time compute pass: {:.2} μs",
104 elapsed_us(
105 self.compute_start_end_timestamps[0],
106 self.compute_start_end_timestamps[1]
107 )
108 );
109 if let Some(timestamp) = self.compute_inside_timestamp {
110 println!(
111 "Elapsed time after first dispatch: {:.2} μs",
112 elapsed_us(self.compute_start_end_timestamps[0], timestamp)
113 );
114 }
115 }
116}
117
118impl Queries {
119 fn new(device: &wgpu::Device, num_queries: u64) -> Self {
120 Queries {
121 set: device.create_query_set(&wgpu::QuerySetDescriptor {
122 label: Some("Timestamp query set"),
123 count: num_queries as _,
124 ty: wgpu::QueryType::Timestamp,
125 }),
126 resolve_buffer: device.create_buffer(&wgpu::BufferDescriptor {
127 label: Some("query resolve buffer"),
128 size: size_of::<u64>() as u64 * num_queries,
129 usage: wgpu::BufferUsages::COPY_SRC | wgpu::BufferUsages::QUERY_RESOLVE,
130 mapped_at_creation: false,
131 }),
132 destination_buffer: device.create_buffer(&wgpu::BufferDescriptor {
133 label: Some("query dest buffer"),
134 size: size_of::<u64>() as u64 * num_queries,
135 usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
136 mapped_at_creation: false,
137 }),
138 num_queries,
139 next_unused_query: 0,
140 }
141 }
142
143 fn resolve(&self, encoder: &mut wgpu::CommandEncoder) {
144 encoder.resolve_query_set(
145 &self.set,
146 0..self.next_unused_query,
147 &self.resolve_buffer,
148 0,
149 );
150 encoder.copy_buffer_to_buffer(
151 &self.resolve_buffer,
152 0,
153 &self.destination_buffer,
154 0,
155 self.resolve_buffer.size(),
156 );
157 }
158
159 fn wait_for_results(&self, device: &wgpu::Device) -> Vec<u64> {
160 self.destination_buffer
161 .slice(..)
162 .map_async(wgpu::MapMode::Read, |_| ());
163 device.poll(wgpu::PollType::wait_indefinitely()).unwrap();
164
165 let timestamps = {
166 let timestamp_view = self
167 .destination_buffer
168 .slice(..(size_of::<u64>() as wgpu::BufferAddress * self.num_queries))
169 .get_mapped_range()
170 .unwrap();
171 bytemuck::allocation::pod_collect_to_vec(×tamp_view)
172 };
173
174 self.destination_buffer.unmap();
175
176 timestamps
177 }
178}
179
180async fn run() {
181 let instance =
183 wgpu::Instance::new(wgpu::InstanceDescriptor::new_without_display_handle_from_env());
184
185 let adapter = instance
187 .request_adapter(&wgpu::RequestAdapterOptions::default())
188 .await
189 .expect("Failed to request adapter.");
190
191 let features = adapter.features()
193 & (wgpu::Features::TIMESTAMP_QUERY | wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES);
194 if features.contains(wgpu::Features::TIMESTAMP_QUERY) {
195 println!("Adapter supports timestamp queries.");
196 } else {
197 println!("Adapter does not support timestamp queries, aborting.");
198 return;
199 }
200 let timestamps_inside_passes = features.contains(wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES);
201 if timestamps_inside_passes {
202 println!("Adapter supports timestamp queries within passes.");
203 } else {
204 println!("Adapter does not support timestamp queries within passes.");
205 }
206
207 let (device, queue) = adapter
210 .request_device(&wgpu::DeviceDescriptor {
211 label: None,
212 required_features: features,
213 required_limits: wgpu::Limits::downlevel_defaults(),
214 experimental_features: wgpu::ExperimentalFeatures::disabled(),
215 memory_hints: wgpu::MemoryHints::MemoryUsage,
216 trace: wgpu::Trace::Off,
217 })
218 .await
219 .unwrap();
220
221 let queries = submit_render_and_compute_pass_with_queries(&device, &queue);
222 let raw_results = queries.wait_for_results(&device);
223 println!("Raw timestamp buffer contents: {raw_results:?}");
224 QueryResults::from_raw_results(raw_results, timestamps_inside_passes).print(&queue);
225}
226
227fn submit_render_and_compute_pass_with_queries(
228 device: &wgpu::Device,
229 queue: &wgpu::Queue,
230) -> Queries {
231 let mut encoder =
232 device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
233
234 let mut queries = Queries::new(device, QueryResults::NUM_QUERIES);
235 let shader = device.create_shader_module(wgpu::include_wgsl!("shader.wgsl"));
236
237 if device
238 .features()
239 .contains(wgpu::Features::TIMESTAMP_QUERY_INSIDE_ENCODERS)
240 {
241 encoder.write_timestamp(&queries.set, queries.next_unused_query);
242 queries.next_unused_query += 1;
243 }
244
245 render_pass(
247 device,
248 &shader,
249 &mut encoder,
250 &queries.set,
251 &mut queries.next_unused_query,
252 );
253
254 compute_pass(
256 device,
257 &shader,
258 &mut encoder,
259 &queries.set,
260 &mut queries.next_unused_query,
261 );
262
263 if device
264 .features()
265 .contains(wgpu::Features::TIMESTAMP_QUERY_INSIDE_ENCODERS)
266 {
267 encoder.write_timestamp(&queries.set, queries.next_unused_query);
268 queries.next_unused_query += 1;
269 }
270
271 queries.resolve(&mut encoder);
272 queue.submit(Some(encoder.finish()));
273
274 queries
275}
276
277fn compute_pass(
278 device: &wgpu::Device,
279 module: &wgpu::ShaderModule,
280 encoder: &mut wgpu::CommandEncoder,
281 query_set: &wgpu::QuerySet,
282 next_unused_query: &mut u32,
283) {
284 let storage_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
285 label: Some("Storage Buffer"),
286 contents: bytemuck::cast_slice(&[42]),
287 usage: wgpu::BufferUsages::STORAGE,
288 });
289 let compute_pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
290 label: None,
291 layout: None,
292 module,
293 entry_point: Some("main_cs"),
294 compilation_options: Default::default(),
295 cache: None,
296 });
297 let bind_group_layout = compute_pipeline.get_bind_group_layout(0);
298 let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {
299 label: None,
300 layout: &bind_group_layout,
301 entries: &[wgpu::BindGroupEntry {
302 binding: 0,
303 resource: storage_buffer.as_entire_binding(),
304 }],
305 });
306
307 let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
308 label: None,
309 timestamp_writes: Some(wgpu::ComputePassTimestampWrites {
310 query_set,
311 beginning_of_pass_write_index: Some(*next_unused_query),
312 end_of_pass_write_index: Some(*next_unused_query + 1),
313 }),
314 });
315 *next_unused_query += 2;
316 cpass.set_pipeline(&compute_pipeline);
317 cpass.set_bind_group(0, &bind_group, &[]);
318 cpass.dispatch_workgroups(1, 1, 1);
319 if device
320 .features()
321 .contains(wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES)
322 {
323 cpass.write_timestamp(query_set, *next_unused_query);
324 *next_unused_query += 1;
325 }
326 cpass.dispatch_workgroups(1, 1, 1);
327}
328
329fn render_pass(
330 device: &wgpu::Device,
331 module: &wgpu::ShaderModule,
332 encoder: &mut wgpu::CommandEncoder,
333 query_set: &wgpu::QuerySet,
334 next_unused_query: &mut u32,
335) {
336 let format = wgpu::TextureFormat::Rgba8Unorm;
337
338 let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
339 label: None,
340 bind_group_layouts: &[],
341 immediate_size: 0,
342 });
343
344 let render_pipeline = device.create_render_pipeline(&wgpu::RenderPipelineDescriptor {
345 label: None,
346 layout: Some(&pipeline_layout),
347 vertex: wgpu::VertexState {
348 module,
349 entry_point: Some("vs_main"),
350 compilation_options: Default::default(),
351 buffers: &[],
352 },
353 fragment: Some(wgpu::FragmentState {
354 module,
355 entry_point: Some("fs_main"),
356 compilation_options: Default::default(),
357 targets: &[Some(format.into())],
358 }),
359 primitive: wgpu::PrimitiveState::default(),
360 depth_stencil: None,
361 multisample: wgpu::MultisampleState::default(),
362 multiview_mask: None,
363 cache: None,
364 });
365 let render_target = device.create_texture(&wgpu::TextureDescriptor {
366 label: Some("rendertarget"),
367 size: wgpu::Extent3d {
368 width: 512,
369 height: 512,
370 depth_or_array_layers: 1,
371 },
372 mip_level_count: 1,
373 sample_count: 1,
374 dimension: wgpu::TextureDimension::D2,
375 format,
376 usage: wgpu::TextureUsages::RENDER_ATTACHMENT,
377 view_formats: &[format],
378 });
379 let render_target_view = render_target.create_view(&wgpu::TextureViewDescriptor::default());
380
381 let mut rpass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
382 label: None,
383 color_attachments: &[Some(wgpu::RenderPassColorAttachment {
384 view: &render_target_view,
385 depth_slice: None,
386 resolve_target: None,
387 ops: wgpu::Operations {
388 load: wgpu::LoadOp::Clear(wgpu::Color::GREEN),
389 store: wgpu::StoreOp::Store,
390 },
391 })],
392 depth_stencil_attachment: None,
393 timestamp_writes: Some(wgpu::RenderPassTimestampWrites {
394 query_set,
395 beginning_of_pass_write_index: Some(*next_unused_query),
396 end_of_pass_write_index: Some(*next_unused_query + 1),
397 }),
398 occlusion_query_set: None,
399 multiview_mask: None,
400 });
401 *next_unused_query += 2;
402
403 rpass.set_pipeline(&render_pipeline);
404
405 rpass.draw(0..3, 0..1);
406 if device
407 .features()
408 .contains(wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES)
409 {
410 rpass.write_timestamp(query_set, *next_unused_query);
411 *next_unused_query += 1;
412 }
413
414 rpass.draw(0..3, 0..1);
415}
416
417pub fn main() {
418 #[cfg(not(target_arch = "wasm32"))]
419 {
420 env_logger::init();
421 pollster::block_on(run());
422 }
423 #[cfg(target_arch = "wasm32")]
424 {
425 std::panic::set_hook(Box::new(console_error_panic_hook::hook));
426 console_log::init().expect("could not initialize logger");
427 wasm_bindgen_futures::spawn_local(run());
428 }
429}
430
431#[cfg(test)]
432pub mod tests {
433 use wgpu::Backends;
434 use wgpu_test::{gpu_test, FailureCase, GpuTestConfiguration};
435
436 use super::{submit_render_and_compute_pass_with_queries, QueryResults};
437
438 #[gpu_test]
439 pub static TIMESTAMPS_PASS_BOUNDARIES: GpuTestConfiguration = GpuTestConfiguration::new()
440 .parameters(
441 wgpu_test::TestParameters::default()
442 .limits(wgpu::Limits::downlevel_defaults())
443 .features(wgpu::Features::TIMESTAMP_QUERY),
444 )
445 .run_sync(|ctx| test_timestamps(ctx, false, false));
446
447 #[gpu_test]
448 pub static TIMESTAMPS_ENCODER: GpuTestConfiguration = GpuTestConfiguration::new()
449 .parameters(
450 wgpu_test::TestParameters::default()
451 .limits(wgpu::Limits::downlevel_defaults())
452 .features(
453 wgpu::Features::TIMESTAMP_QUERY
454 | wgpu::Features::TIMESTAMP_QUERY_INSIDE_ENCODERS,
455 ),
456 )
457 .run_sync(|ctx| test_timestamps(ctx, true, false));
458
459 #[gpu_test]
460 pub static TIMESTAMPS_PASSES: GpuTestConfiguration = GpuTestConfiguration::new()
461 .parameters(
462 wgpu_test::TestParameters::default()
463 .limits(wgpu::Limits::downlevel_defaults())
464 .features(
465 wgpu::Features::TIMESTAMP_QUERY
466 | wgpu::Features::TIMESTAMP_QUERY_INSIDE_ENCODERS
467 | wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES,
468 )
469 .expect_fail(
470 FailureCase::backend_adapter(Backends::GL, "llvmpipe")
471 .panic("unexpected: inner timestamp before compute pass start timestamp")
472 .flaky(),
473 ),
474 )
475 .run_sync(|ctx| test_timestamps(ctx, true, true));
476
477 fn test_timestamps(
478 ctx: wgpu_test::TestingContext,
479 timestamps_on_encoder: bool,
480 timestamps_inside_passes: bool,
481 ) {
482 let queries = submit_render_and_compute_pass_with_queries(&ctx.device, &ctx.queue);
483 let raw_results = queries.wait_for_results(&ctx.device);
484 println!("Raw timestamp buffer contents: {raw_results:?}");
485 let query_results = QueryResults::from_raw_results(raw_results, timestamps_inside_passes);
486 query_results.print(&ctx.queue);
487
488 let QueryResults {
489 encoder_timestamps,
490 render_start_end_timestamps,
491 render_inside_timestamp,
492 compute_start_end_timestamps,
493 compute_inside_timestamp,
494 } = query_results;
495
496 let render_delta =
499 render_start_end_timestamps[1].wrapping_sub(render_start_end_timestamps[0]);
500 let compute_delta =
501 compute_start_end_timestamps[1].wrapping_sub(compute_start_end_timestamps[0]);
502 let encoder_delta = encoder_timestamps[1].wrapping_sub(encoder_timestamps[0]);
503
504 if timestamps_on_encoder {
505 assert!(encoder_delta > 0);
506 assert!(encoder_delta >= render_delta + compute_delta);
507 }
508 if let Some(render_inside_timestamp) = render_inside_timestamp {
509 assert!(render_inside_timestamp >= render_start_end_timestamps[0]);
510 assert!(render_inside_timestamp <= render_start_end_timestamps[1]);
511 }
512 if let Some(compute_inside_timestamp) = compute_inside_timestamp {
513 assert!(
514 compute_inside_timestamp >= compute_start_end_timestamps[0],
515 "unexpected: inner timestamp before compute pass start timestamp"
516 );
517 assert!(compute_inside_timestamp <= compute_start_end_timestamps[1]);
518 }
519 }
520}