wgpu_examples/repeated_compute/
mod.rs

1//! See hello-compute example main.rs for more details
2//! as similar items here are not explained.
3//!
4//! This example does elaborate on some things though that the
5//! hello-compute example does not such as mapping buffers
6//! and why use the async channels.
7
8use nanorand::Rng;
9
10const OVERFLOW: u32 = 0xffffffff;
11
12async fn run() {
13    let mut numbers = [0u32; 256];
14    let context = WgpuContext::new(size_of_val(&numbers)).await;
15
16    let mut rand = nanorand::WyRand::new();
17
18    for _ in 0..10 {
19        for p in numbers.iter_mut() {
20            *p = rand.generate::<u16>() as u32;
21        }
22
23        compute(&mut numbers, &context).await;
24
25        let printed_numbers = numbers
26            .iter()
27            .map(|n| match n {
28                &OVERFLOW => "(overflow)".to_string(),
29                n => n.to_string(),
30            })
31            .collect::<Vec<String>>();
32        log::info!("Results: {printed_numbers:?}");
33    }
34}
35
36async fn compute(local_buffer: &mut [u32], context: &WgpuContext) {
37    log::info!("Beginning GPU compute on data {local_buffer:?}.");
38    // Local buffer contents -> GPU storage buffer
39    // Adds a write buffer command to the queue. This command is more complicated
40    // than it appears.
41    context.queue.write_buffer(
42        &context.storage_buffer,
43        0,
44        bytemuck::cast_slice(local_buffer),
45    );
46    log::info!("Wrote to buffer.");
47
48    let mut command_encoder = context
49        .device
50        .create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
51
52    {
53        let mut compute_pass = command_encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
54            label: None,
55            timestamp_writes: None,
56        });
57        compute_pass.set_pipeline(&context.pipeline);
58        compute_pass.set_bind_group(0, &context.bind_group, &[]);
59        compute_pass.dispatch_workgroups(local_buffer.len() as u32, 1, 1);
60    }
61    // We finish the compute pass by dropping it.
62
63    // Entire storage buffer -> staging buffer.
64    command_encoder.copy_buffer_to_buffer(
65        &context.storage_buffer,
66        0,
67        &context.output_staging_buffer,
68        0,
69        context.storage_buffer.size(),
70    );
71
72    // Finalize the command encoder, add the contained commands to the queue and flush.
73    context.queue.submit(Some(command_encoder.finish()));
74    log::info!("Submitted commands.");
75
76    // Finally time to get our results.
77    // First we get a buffer slice which represents a chunk of the buffer (which we
78    // can't access yet).
79    // We want the whole thing so use unbounded range.
80    let buffer_slice = context.output_staging_buffer.slice(..);
81    // Now things get complicated. WebGPU, for safety reasons, only allows either the GPU
82    // or CPU to access a buffer's contents at a time. We need to "map" the buffer which means
83    // flipping ownership of the buffer over to the CPU and making access legal. We do this
84    // with `BufferSlice::map_async`.
85    //
86    // The problem is that map_async is not an async function so we can't await it. What
87    // we need to do instead is pass in a closure that will be executed when the slice is
88    // either mapped or the mapping has failed.
89    //
90    // The problem with this is that we don't have a reliable way to wait in the main
91    // code for the buffer to be mapped and even worse, calling get_mapped_range or
92    // get_mapped_range_mut prematurely will cause a panic, not return an error.
93    //
94    // Using channels solves this as awaiting the receiving of a message from
95    // the passed closure will force the outside code to wait. It also doesn't hurt
96    // if the closure finishes before the outside code catches up as the message is
97    // buffered and receiving will just pick that up.
98    //
99    // It may also be worth noting that although on native, the usage of asynchronous
100    // channels is wholly unnecessary, for the sake of portability to WASM (std channels
101    // don't work on WASM,) we'll use async channels that work on both native and WASM.
102    let (sender, receiver) = flume::bounded(1);
103    buffer_slice.map_async(wgpu::MapMode::Read, move |r| sender.send(r).unwrap());
104    // In order for the mapping to be completed, one of three things must happen.
105    // One of those can be calling `Device::poll`. This isn't necessary on the web as devices
106    // are polled automatically but natively, we need to make sure this happens manually.
107    // `PollType::Wait` will cause the thread to wait on native but not on WebGpu.
108    context.device.poll(wgpu::PollType::wait()).unwrap();
109    log::info!("Device polled.");
110    // Now we await the receiving and panic if anything went wrong because we're lazy.
111    receiver.recv_async().await.unwrap().unwrap();
112    log::info!("Result received.");
113    // NOW we can call get_mapped_range.
114    {
115        let view = buffer_slice.get_mapped_range();
116        local_buffer.copy_from_slice(bytemuck::cast_slice(&view));
117    }
118    log::info!("Results written to local buffer.");
119    // We need to make sure all `BufferView`'s are dropped before we do what we're about
120    // to do.
121    // Unmap so that we can copy to the staging buffer in the next iteration.
122    context.output_staging_buffer.unmap();
123}
124
125pub fn main() {
126    #[cfg(not(target_arch = "wasm32"))]
127    {
128        env_logger::builder()
129            .filter_level(log::LevelFilter::Info)
130            .format_timestamp_nanos()
131            .init();
132        pollster::block_on(run());
133    }
134    #[cfg(target_arch = "wasm32")]
135    {
136        std::panic::set_hook(Box::new(console_error_panic_hook::hook));
137        console_log::init_with_level(log::Level::Info).expect("could not initialize logger");
138
139        crate::utils::add_web_nothing_to_see_msg();
140
141        wasm_bindgen_futures::spawn_local(run());
142    }
143}
144
145/// A convenient way to hold together all the useful wgpu stuff together.
146struct WgpuContext {
147    device: wgpu::Device,
148    queue: wgpu::Queue,
149    pipeline: wgpu::ComputePipeline,
150    bind_group: wgpu::BindGroup,
151    storage_buffer: wgpu::Buffer,
152    output_staging_buffer: wgpu::Buffer,
153}
154
155impl WgpuContext {
156    async fn new(buffer_size: usize) -> WgpuContext {
157        let instance = wgpu::Instance::default();
158        let adapter = instance
159            .request_adapter(&wgpu::RequestAdapterOptions::default())
160            .await
161            .unwrap();
162        let (device, queue) = adapter
163            .request_device(&wgpu::DeviceDescriptor {
164                label: None,
165                required_features: wgpu::Features::empty(),
166                required_limits: wgpu::Limits::downlevel_defaults(),
167                experimental_features: wgpu::ExperimentalFeatures::disabled(),
168                memory_hints: wgpu::MemoryHints::Performance,
169                trace: wgpu::Trace::Off,
170            })
171            .await
172            .unwrap();
173
174        // Our shader, kindly compiled with Naga.
175        let shader = device.create_shader_module(wgpu::include_wgsl!("shader.wgsl"));
176
177        // This is where the GPU will read from and write to.
178        let storage_buffer = device.create_buffer(&wgpu::BufferDescriptor {
179            label: None,
180            size: buffer_size as wgpu::BufferAddress,
181            usage: wgpu::BufferUsages::STORAGE
182                | wgpu::BufferUsages::COPY_DST
183                | wgpu::BufferUsages::COPY_SRC,
184            mapped_at_creation: false,
185        });
186        // For portability reasons, WebGPU draws a distinction between memory that is
187        // accessible by the CPU and memory that is accessible by the GPU. Only
188        // buffers accessible by the CPU can be mapped and accessed by the CPU and
189        // only buffers visible to the GPU can be used in shaders. In order to get
190        // data from the GPU, we need to use CommandEncoder::copy_buffer_to_buffer
191        // (which we will later) to copy the buffer modified by the GPU into a
192        // mappable, CPU-accessible buffer which we'll create here.
193        let output_staging_buffer = device.create_buffer(&wgpu::BufferDescriptor {
194            label: None,
195            size: buffer_size as wgpu::BufferAddress,
196            usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
197            mapped_at_creation: false,
198        });
199
200        // This can be though of as the function signature for our CPU-GPU function.
201        let bind_group_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
202            label: None,
203            entries: &[wgpu::BindGroupLayoutEntry {
204                binding: 0,
205                visibility: wgpu::ShaderStages::COMPUTE,
206                ty: wgpu::BindingType::Buffer {
207                    ty: wgpu::BufferBindingType::Storage { read_only: false },
208                    has_dynamic_offset: false,
209                    // Going to have this be None just to be safe.
210                    min_binding_size: None,
211                },
212                count: None,
213            }],
214        });
215        // This ties actual resources stored in the GPU to our metaphorical function
216        // through the binding slots we defined above.
217        let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {
218            label: None,
219            layout: &bind_group_layout,
220            entries: &[wgpu::BindGroupEntry {
221                binding: 0,
222                resource: storage_buffer.as_entire_binding(),
223            }],
224        });
225
226        let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
227            label: None,
228            bind_group_layouts: &[&bind_group_layout],
229            push_constant_ranges: &[],
230        });
231        let pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
232            label: None,
233            layout: Some(&pipeline_layout),
234            module: &shader,
235            entry_point: Some("main"),
236            compilation_options: Default::default(),
237            cache: None,
238        });
239
240        WgpuContext {
241            device,
242            queue,
243            pipeline,
244            bind_group,
245            storage_buffer,
246            output_staging_buffer,
247        }
248    }
249}