wgpu_examples/repeated_compute/
mod.rs

1//! See hello-compute example main.rs for more details
2//! as similar items here are not explained.
3//!
4//! This example does elaborate on some things though that the
5//! hello-compute example does not such as mapping buffers
6//! and why use the async channels.
7
8use nanorand::Rng;
9
10const OVERFLOW: u32 = 0xffffffff;
11
12async fn run() {
13    let mut numbers = [0u32; 256];
14    let context = WgpuContext::new(size_of_val(&numbers)).await;
15
16    let mut rand = nanorand::WyRand::new();
17
18    for _ in 0..10 {
19        for p in numbers.iter_mut() {
20            *p = rand.generate::<u16>() as u32;
21        }
22
23        compute(&mut numbers, &context).await;
24
25        let printed_numbers = numbers
26            .iter()
27            .map(|n| match n {
28                &OVERFLOW => "(overflow)".to_string(),
29                n => n.to_string(),
30            })
31            .collect::<Vec<String>>();
32        log::info!("Results: {printed_numbers:?}");
33    }
34}
35
36async fn compute(local_buffer: &mut [u32], context: &WgpuContext) {
37    log::info!("Beginning GPU compute on data {local_buffer:?}.");
38    // Local buffer contents -> GPU storage buffer
39    // Adds a write buffer command to the queue. This command is more complicated
40    // than it appears.
41    context.queue.write_buffer(
42        &context.storage_buffer,
43        0,
44        bytemuck::cast_slice(local_buffer),
45    );
46    log::info!("Wrote to buffer.");
47
48    let mut command_encoder = context
49        .device
50        .create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
51
52    {
53        let mut compute_pass = command_encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
54            label: None,
55            timestamp_writes: None,
56        });
57        compute_pass.set_pipeline(&context.pipeline);
58        compute_pass.set_bind_group(0, &context.bind_group, &[]);
59        compute_pass.dispatch_workgroups(local_buffer.len() as u32, 1, 1);
60    }
61    // We finish the compute pass by dropping it.
62
63    // Entire storage buffer -> staging buffer.
64    command_encoder.copy_buffer_to_buffer(
65        &context.storage_buffer,
66        0,
67        &context.output_staging_buffer,
68        0,
69        context.storage_buffer.size(),
70    );
71
72    // Finalize the command encoder, add the contained commands to the queue and flush.
73    context.queue.submit(Some(command_encoder.finish()));
74    log::info!("Submitted commands.");
75
76    // Finally time to get our results.
77    // First we get a buffer slice which represents a chunk of the buffer (which we
78    // can't access yet).
79    // We want the whole thing so use unbounded range.
80    let buffer_slice = context.output_staging_buffer.slice(..);
81    // Now things get complicated. WebGPU, for safety reasons, only allows either the GPU
82    // or CPU to access a buffer's contents at a time. We need to "map" the buffer which means
83    // flipping ownership of the buffer over to the CPU and making access legal. We do this
84    // with `BufferSlice::map_async`.
85    //
86    // The problem is that map_async is not an async function so we can't await it. What
87    // we need to do instead is pass in a closure that will be executed when the slice is
88    // either mapped or the mapping has failed.
89    //
90    // The problem with this is that we don't have a reliable way to wait in the main
91    // code for the buffer to be mapped and even worse, calling get_mapped_range or
92    // get_mapped_range_mut prematurely will cause a panic, not return an error.
93    //
94    // Using channels solves this as awaiting the receiving of a message from
95    // the passed closure will force the outside code to wait. It also doesn't hurt
96    // if the closure finishes before the outside code catches up as the message is
97    // buffered and receiving will just pick that up.
98    //
99    // It may also be worth noting that although on native, the usage of asynchronous
100    // channels is wholly unnecessary, for the sake of portability to WASM (std channels
101    // don't work on WASM,) we'll use async channels that work on both native and WASM.
102    let (sender, receiver) = flume::bounded(1);
103    buffer_slice.map_async(wgpu::MapMode::Read, move |r| sender.send(r).unwrap());
104    // In order for the mapping to be completed, one of three things must happen.
105    // One of those can be calling `Device::poll`. This isn't necessary on the web as devices
106    // are polled automatically but natively, we need to make sure this happens manually.
107    // `PollType::Wait` will cause the thread to wait on native but not on WebGpu.
108    context
109        .device
110        .poll(wgpu::PollType::wait_indefinitely())
111        .unwrap();
112    log::info!("Device polled.");
113    // Now we await the receiving and panic if anything went wrong because we're lazy.
114    receiver.recv_async().await.unwrap().unwrap();
115    log::info!("Result received.");
116    // NOW we can call get_mapped_range.
117    {
118        let view = buffer_slice.get_mapped_range().unwrap();
119        let data: Vec<u32> = bytemuck::allocation::pod_collect_to_vec(&view);
120        local_buffer.copy_from_slice(&data);
121    }
122    log::info!("Results written to local buffer.");
123    // We need to make sure all `BufferView`'s are dropped before we do what we're about
124    // to do.
125    // Unmap so that we can copy to the staging buffer in the next iteration.
126    context.output_staging_buffer.unmap();
127}
128
129pub fn main() {
130    #[cfg(not(target_arch = "wasm32"))]
131    {
132        env_logger::builder()
133            .filter_level(log::LevelFilter::Info)
134            .format_timestamp_nanos()
135            .init();
136        pollster::block_on(run());
137    }
138    #[cfg(target_arch = "wasm32")]
139    {
140        std::panic::set_hook(Box::new(console_error_panic_hook::hook));
141        console_log::init_with_level(log::Level::Info).expect("could not initialize logger");
142
143        crate::utils::add_web_nothing_to_see_msg();
144
145        wasm_bindgen_futures::spawn_local(run());
146    }
147}
148
149/// A convenient way to hold together all the useful wgpu stuff together.
150struct WgpuContext {
151    device: wgpu::Device,
152    queue: wgpu::Queue,
153    pipeline: wgpu::ComputePipeline,
154    bind_group: wgpu::BindGroup,
155    storage_buffer: wgpu::Buffer,
156    output_staging_buffer: wgpu::Buffer,
157}
158
159impl WgpuContext {
160    async fn new(buffer_size: usize) -> WgpuContext {
161        let instance = wgpu::Instance::default();
162        let adapter = instance
163            .request_adapter(&wgpu::RequestAdapterOptions::default())
164            .await
165            .unwrap();
166        let (device, queue) = adapter
167            .request_device(&wgpu::DeviceDescriptor {
168                label: None,
169                required_features: wgpu::Features::empty(),
170                required_limits: wgpu::Limits::downlevel_defaults(),
171                experimental_features: wgpu::ExperimentalFeatures::disabled(),
172                memory_hints: wgpu::MemoryHints::Performance,
173                trace: wgpu::Trace::Off,
174            })
175            .await
176            .unwrap();
177
178        // Our shader, kindly compiled with Naga.
179        let shader = device.create_shader_module(wgpu::include_wgsl!("shader.wgsl"));
180
181        // This is where the GPU will read from and write to.
182        let storage_buffer = device.create_buffer(&wgpu::BufferDescriptor {
183            label: None,
184            size: buffer_size as wgpu::BufferAddress,
185            usage: wgpu::BufferUsages::STORAGE
186                | wgpu::BufferUsages::COPY_DST
187                | wgpu::BufferUsages::COPY_SRC,
188            mapped_at_creation: false,
189        });
190        // For portability reasons, WebGPU draws a distinction between memory that is
191        // accessible by the CPU and memory that is accessible by the GPU. Only
192        // buffers accessible by the CPU can be mapped and accessed by the CPU and
193        // only buffers visible to the GPU can be used in shaders. In order to get
194        // data from the GPU, we need to use CommandEncoder::copy_buffer_to_buffer
195        // (which we will later) to copy the buffer modified by the GPU into a
196        // mappable, CPU-accessible buffer which we'll create here.
197        let output_staging_buffer = device.create_buffer(&wgpu::BufferDescriptor {
198            label: None,
199            size: buffer_size as wgpu::BufferAddress,
200            usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
201            mapped_at_creation: false,
202        });
203
204        // This can be though of as the function signature for our CPU-GPU function.
205        let bind_group_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
206            label: None,
207            entries: &[wgpu::BindGroupLayoutEntry {
208                binding: 0,
209                visibility: wgpu::ShaderStages::COMPUTE,
210                ty: wgpu::BindingType::Buffer {
211                    ty: wgpu::BufferBindingType::Storage { read_only: false },
212                    has_dynamic_offset: false,
213                    // Going to have this be None just to be safe.
214                    min_binding_size: None,
215                },
216                count: None,
217            }],
218        });
219        // This ties actual resources stored in the GPU to our metaphorical function
220        // through the binding slots we defined above.
221        let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {
222            label: None,
223            layout: &bind_group_layout,
224            entries: &[wgpu::BindGroupEntry {
225                binding: 0,
226                resource: storage_buffer.as_entire_binding(),
227            }],
228        });
229
230        let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
231            label: None,
232            bind_group_layouts: &[Some(&bind_group_layout)],
233            immediate_size: 0,
234        });
235        let pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
236            label: None,
237            layout: Some(&pipeline_layout),
238            module: &shader,
239            entry_point: Some("main"),
240            compilation_options: Default::default(),
241            cache: None,
242        });
243
244        WgpuContext {
245            device,
246            queue,
247            pipeline,
248            bind_group,
249            storage_buffer,
250            output_staging_buffer,
251        }
252    }
253}