1//! See hello-compute example main.rs for more details
2//! as similar items here are not explained.
3//!
4//! This example does elaborate on some things though that the
5//! hello-compute example does not such as mapping buffers
6//! and why use the async channels.
78use nanorand::Rng;
910const OVERFLOW: u32 = 0xffffffff;
1112async fn run() {
13let mut numbers = [0u32; 256];
14let context = WgpuContext::new(size_of_val(&numbers)).await;
1516for _ in 0..10 {
17for p in numbers.iter_mut() {
18*p = nanorand::tls_rng().generate::<u16>() as u32;
19 }
2021 compute(&mut numbers, &context).await;
2223let printed_numbers = numbers
24 .iter()
25 .map(|n| match n {
26&OVERFLOW => "(overflow)".to_string(),
27 n => n.to_string(),
28 })
29 .collect::<Vec<String>>();
30log::info!("Results: {printed_numbers:?}");
31 }
32}
3334async fn compute(local_buffer: &mut [u32], context: &WgpuContext) {
35log::info!("Beginning GPU compute on data {local_buffer:?}.");
36// Local buffer contents -> GPU storage buffer
37 // Adds a write buffer command to the queue. This command is more complicated
38 // than it appears.
39context.queue.write_buffer(
40&context.storage_buffer,
410,
42 bytemuck::cast_slice(local_buffer),
43 );
44log::info!("Wrote to buffer.");
4546let mut command_encoder = context
47 .device
48 .create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
4950 {
51let mut compute_pass = command_encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
52 label: None,
53 timestamp_writes: None,
54 });
55 compute_pass.set_pipeline(&context.pipeline);
56 compute_pass.set_bind_group(0, &context.bind_group, &[]);
57 compute_pass.dispatch_workgroups(local_buffer.len() as u32, 1, 1);
58 }
59// We finish the compute pass by dropping it.
6061 // Entire storage buffer -> staging buffer.
62command_encoder.copy_buffer_to_buffer(
63&context.storage_buffer,
640,
65&context.output_staging_buffer,
660,
67 context.storage_buffer.size(),
68 );
6970// Finalize the command encoder, add the contained commands to the queue and flush.
71context.queue.submit(Some(command_encoder.finish()));
72log::info!("Submitted commands.");
7374// Finally time to get our results.
75 // First we get a buffer slice which represents a chunk of the buffer (which we
76 // can't access yet).
77 // We want the whole thing so use unbounded range.
78let buffer_slice = context.output_staging_buffer.slice(..);
79// Now things get complicated. WebGPU, for safety reasons, only allows either the GPU
80 // or CPU to access a buffer's contents at a time. We need to "map" the buffer which means
81 // flipping ownership of the buffer over to the CPU and making access legal. We do this
82 // with `BufferSlice::map_async`.
83 //
84 // The problem is that map_async is not an async function so we can't await it. What
85 // we need to do instead is pass in a closure that will be executed when the slice is
86 // either mapped or the mapping has failed.
87 //
88 // The problem with this is that we don't have a reliable way to wait in the main
89 // code for the buffer to be mapped and even worse, calling get_mapped_range or
90 // get_mapped_range_mut prematurely will cause a panic, not return an error.
91 //
92 // Using channels solves this as awaiting the receiving of a message from
93 // the passed closure will force the outside code to wait. It also doesn't hurt
94 // if the closure finishes before the outside code catches up as the message is
95 // buffered and receiving will just pick that up.
96 //
97 // It may also be worth noting that although on native, the usage of asynchronous
98 // channels is wholly unnecessary, for the sake of portability to WASM (std channels
99 // don't work on WASM,) we'll use async channels that work on both native and WASM.
100let (sender, receiver) = flume::bounded(1);
101 buffer_slice.map_async(wgpu::MapMode::Read, move |r| sender.send(r).unwrap());
102// In order for the mapping to be completed, one of three things must happen.
103 // One of those can be calling `Device::poll`. This isn't necessary on the web as devices
104 // are polled automatically but natively, we need to make sure this happens manually.
105 // `PollType::Wait` will cause the thread to wait on native but not on WebGpu.
106context.device.poll(wgpu::PollType::wait()).unwrap();
107log::info!("Device polled.");
108// Now we await the receiving and panic if anything went wrong because we're lazy.
109receiver.recv_async().await.unwrap().unwrap();
110log::info!("Result received.");
111// NOW we can call get_mapped_range.
112{
113let view = buffer_slice.get_mapped_range();
114 local_buffer.copy_from_slice(bytemuck::cast_slice(&view));
115 }
116log::info!("Results written to local buffer.");
117// We need to make sure all `BufferView`'s are dropped before we do what we're about
118 // to do.
119 // Unmap so that we can copy to the staging buffer in the next iteration.
120context.output_staging_buffer.unmap();
121}
122123pub fn main() {
124#[cfg(not(target_arch = "wasm32"))]
125{
126 env_logger::builder()
127 .filter_level(log::LevelFilter::Info)
128 .format_timestamp_nanos()
129 .init();
130 pollster::block_on(run());
131 }
132#[cfg(target_arch = "wasm32")]
133{
134 std::panic::set_hook(Box::new(console_error_panic_hook::hook));
135 console_log::init_with_level(log::Level::Info).expect("could not initialize logger");
136137crate::utils::add_web_nothing_to_see_msg();
138139 wasm_bindgen_futures::spawn_local(run());
140 }
141}
142143/// A convenient way to hold together all the useful wgpu stuff together.
144struct WgpuContext {
145 device: wgpu::Device,
146 queue: wgpu::Queue,
147 pipeline: wgpu::ComputePipeline,
148 bind_group: wgpu::BindGroup,
149 storage_buffer: wgpu::Buffer,
150 output_staging_buffer: wgpu::Buffer,
151}
152153impl WgpuContext {
154async fn new(buffer_size: usize) -> WgpuContext {
155let instance = wgpu::Instance::default();
156let adapter = instance
157 .request_adapter(&wgpu::RequestAdapterOptions::default())
158 .await
159.unwrap();
160let (device, queue) = adapter
161 .request_device(&wgpu::DeviceDescriptor {
162 label: None,
163 required_features: wgpu::Features::empty(),
164 required_limits: wgpu::Limits::downlevel_defaults(),
165 memory_hints: wgpu::MemoryHints::Performance,
166 trace: wgpu::Trace::Off,
167 })
168 .await
169.unwrap();
170171// Our shader, kindly compiled with Naga.
172let shader = device.create_shader_module(wgpu::include_wgsl!("shader.wgsl"));
173174// This is where the GPU will read from and write to.
175let storage_buffer = device.create_buffer(&wgpu::BufferDescriptor {
176 label: None,
177 size: buffer_size as wgpu::BufferAddress,
178 usage: wgpu::BufferUsages::STORAGE
179 | wgpu::BufferUsages::COPY_DST
180 | wgpu::BufferUsages::COPY_SRC,
181 mapped_at_creation: false,
182 });
183// For portability reasons, WebGPU draws a distinction between memory that is
184 // accessible by the CPU and memory that is accessible by the GPU. Only
185 // buffers accessible by the CPU can be mapped and accessed by the CPU and
186 // only buffers visible to the GPU can be used in shaders. In order to get
187 // data from the GPU, we need to use CommandEncoder::copy_buffer_to_buffer
188 // (which we will later) to copy the buffer modified by the GPU into a
189 // mappable, CPU-accessible buffer which we'll create here.
190let output_staging_buffer = device.create_buffer(&wgpu::BufferDescriptor {
191 label: None,
192 size: buffer_size as wgpu::BufferAddress,
193 usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
194 mapped_at_creation: false,
195 });
196197// This can be though of as the function signature for our CPU-GPU function.
198let bind_group_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
199 label: None,
200 entries: &[wgpu::BindGroupLayoutEntry {
201 binding: 0,
202 visibility: wgpu::ShaderStages::COMPUTE,
203 ty: wgpu::BindingType::Buffer {
204 ty: wgpu::BufferBindingType::Storage { read_only: false },
205 has_dynamic_offset: false,
206// Going to have this be None just to be safe.
207min_binding_size: None,
208 },
209 count: None,
210 }],
211 });
212// This ties actual resources stored in the GPU to our metaphorical function
213 // through the binding slots we defined above.
214let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {
215 label: None,
216 layout: &bind_group_layout,
217 entries: &[wgpu::BindGroupEntry {
218 binding: 0,
219 resource: storage_buffer.as_entire_binding(),
220 }],
221 });
222223let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
224 label: None,
225 bind_group_layouts: &[&bind_group_layout],
226 push_constant_ranges: &[],
227 });
228let pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
229 label: None,
230 layout: Some(&pipeline_layout),
231 module: &shader,
232 entry_point: Some("main"),
233 compilation_options: Default::default(),
234 cache: None,
235 });
236237 WgpuContext {
238 device,
239 queue,
240 pipeline,
241 bind_group,
242 storage_buffer,
243 output_staging_buffer,
244 }
245 }
246}