wgpu_examples/repeated_compute/
mod.rs

1//! See hello-compute example main.rs for more details
2//! as similar items here are not explained.
3//!
4//! This example does elaborate on some things though that the
5//! hello-compute example does not such as mapping buffers
6//! and why use the async channels.
7
8use nanorand::Rng;
9
10const OVERFLOW: u32 = 0xffffffff;
11
12async fn run() {
13    let mut numbers = [0u32; 256];
14    let context = WgpuContext::new(size_of_val(&numbers)).await;
15
16    for _ in 0..10 {
17        for p in numbers.iter_mut() {
18            *p = nanorand::tls_rng().generate::<u16>() as u32;
19        }
20
21        compute(&mut numbers, &context).await;
22
23        let printed_numbers = numbers
24            .iter()
25            .map(|n| match n {
26                &OVERFLOW => "(overflow)".to_string(),
27                n => n.to_string(),
28            })
29            .collect::<Vec<String>>();
30        log::info!("Results: {printed_numbers:?}");
31    }
32}
33
34async fn compute(local_buffer: &mut [u32], context: &WgpuContext) {
35    log::info!("Beginning GPU compute on data {local_buffer:?}.");
36    // Local buffer contents -> GPU storage buffer
37    // Adds a write buffer command to the queue. This command is more complicated
38    // than it appears.
39    context.queue.write_buffer(
40        &context.storage_buffer,
41        0,
42        bytemuck::cast_slice(local_buffer),
43    );
44    log::info!("Wrote to buffer.");
45
46    let mut command_encoder = context
47        .device
48        .create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
49
50    {
51        let mut compute_pass = command_encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
52            label: None,
53            timestamp_writes: None,
54        });
55        compute_pass.set_pipeline(&context.pipeline);
56        compute_pass.set_bind_group(0, &context.bind_group, &[]);
57        compute_pass.dispatch_workgroups(local_buffer.len() as u32, 1, 1);
58    }
59    // We finish the compute pass by dropping it.
60
61    // Entire storage buffer -> staging buffer.
62    command_encoder.copy_buffer_to_buffer(
63        &context.storage_buffer,
64        0,
65        &context.output_staging_buffer,
66        0,
67        context.storage_buffer.size(),
68    );
69
70    // Finalize the command encoder, add the contained commands to the queue and flush.
71    context.queue.submit(Some(command_encoder.finish()));
72    log::info!("Submitted commands.");
73
74    // Finally time to get our results.
75    // First we get a buffer slice which represents a chunk of the buffer (which we
76    // can't access yet).
77    // We want the whole thing so use unbounded range.
78    let buffer_slice = context.output_staging_buffer.slice(..);
79    // Now things get complicated. WebGPU, for safety reasons, only allows either the GPU
80    // or CPU to access a buffer's contents at a time. We need to "map" the buffer which means
81    // flipping ownership of the buffer over to the CPU and making access legal. We do this
82    // with `BufferSlice::map_async`.
83    //
84    // The problem is that map_async is not an async function so we can't await it. What
85    // we need to do instead is pass in a closure that will be executed when the slice is
86    // either mapped or the mapping has failed.
87    //
88    // The problem with this is that we don't have a reliable way to wait in the main
89    // code for the buffer to be mapped and even worse, calling get_mapped_range or
90    // get_mapped_range_mut prematurely will cause a panic, not return an error.
91    //
92    // Using channels solves this as awaiting the receiving of a message from
93    // the passed closure will force the outside code to wait. It also doesn't hurt
94    // if the closure finishes before the outside code catches up as the message is
95    // buffered and receiving will just pick that up.
96    //
97    // It may also be worth noting that although on native, the usage of asynchronous
98    // channels is wholly unnecessary, for the sake of portability to WASM (std channels
99    // don't work on WASM,) we'll use async channels that work on both native and WASM.
100    let (sender, receiver) = flume::bounded(1);
101    buffer_slice.map_async(wgpu::MapMode::Read, move |r| sender.send(r).unwrap());
102    // In order for the mapping to be completed, one of three things must happen.
103    // One of those can be calling `Device::poll`. This isn't necessary on the web as devices
104    // are polled automatically but natively, we need to make sure this happens manually.
105    // `PollType::Wait` will cause the thread to wait on native but not on WebGpu.
106    context.device.poll(wgpu::PollType::wait()).unwrap();
107    log::info!("Device polled.");
108    // Now we await the receiving and panic if anything went wrong because we're lazy.
109    receiver.recv_async().await.unwrap().unwrap();
110    log::info!("Result received.");
111    // NOW we can call get_mapped_range.
112    {
113        let view = buffer_slice.get_mapped_range();
114        local_buffer.copy_from_slice(bytemuck::cast_slice(&view));
115    }
116    log::info!("Results written to local buffer.");
117    // We need to make sure all `BufferView`'s are dropped before we do what we're about
118    // to do.
119    // Unmap so that we can copy to the staging buffer in the next iteration.
120    context.output_staging_buffer.unmap();
121}
122
123pub fn main() {
124    #[cfg(not(target_arch = "wasm32"))]
125    {
126        env_logger::builder()
127            .filter_level(log::LevelFilter::Info)
128            .format_timestamp_nanos()
129            .init();
130        pollster::block_on(run());
131    }
132    #[cfg(target_arch = "wasm32")]
133    {
134        std::panic::set_hook(Box::new(console_error_panic_hook::hook));
135        console_log::init_with_level(log::Level::Info).expect("could not initialize logger");
136
137        crate::utils::add_web_nothing_to_see_msg();
138
139        wasm_bindgen_futures::spawn_local(run());
140    }
141}
142
143/// A convenient way to hold together all the useful wgpu stuff together.
144struct WgpuContext {
145    device: wgpu::Device,
146    queue: wgpu::Queue,
147    pipeline: wgpu::ComputePipeline,
148    bind_group: wgpu::BindGroup,
149    storage_buffer: wgpu::Buffer,
150    output_staging_buffer: wgpu::Buffer,
151}
152
153impl WgpuContext {
154    async fn new(buffer_size: usize) -> WgpuContext {
155        let instance = wgpu::Instance::default();
156        let adapter = instance
157            .request_adapter(&wgpu::RequestAdapterOptions::default())
158            .await
159            .unwrap();
160        let (device, queue) = adapter
161            .request_device(&wgpu::DeviceDescriptor {
162                label: None,
163                required_features: wgpu::Features::empty(),
164                required_limits: wgpu::Limits::downlevel_defaults(),
165                memory_hints: wgpu::MemoryHints::Performance,
166                trace: wgpu::Trace::Off,
167            })
168            .await
169            .unwrap();
170
171        // Our shader, kindly compiled with Naga.
172        let shader = device.create_shader_module(wgpu::include_wgsl!("shader.wgsl"));
173
174        // This is where the GPU will read from and write to.
175        let storage_buffer = device.create_buffer(&wgpu::BufferDescriptor {
176            label: None,
177            size: buffer_size as wgpu::BufferAddress,
178            usage: wgpu::BufferUsages::STORAGE
179                | wgpu::BufferUsages::COPY_DST
180                | wgpu::BufferUsages::COPY_SRC,
181            mapped_at_creation: false,
182        });
183        // For portability reasons, WebGPU draws a distinction between memory that is
184        // accessible by the CPU and memory that is accessible by the GPU. Only
185        // buffers accessible by the CPU can be mapped and accessed by the CPU and
186        // only buffers visible to the GPU can be used in shaders. In order to get
187        // data from the GPU, we need to use CommandEncoder::copy_buffer_to_buffer
188        // (which we will later) to copy the buffer modified by the GPU into a
189        // mappable, CPU-accessible buffer which we'll create here.
190        let output_staging_buffer = device.create_buffer(&wgpu::BufferDescriptor {
191            label: None,
192            size: buffer_size as wgpu::BufferAddress,
193            usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
194            mapped_at_creation: false,
195        });
196
197        // This can be though of as the function signature for our CPU-GPU function.
198        let bind_group_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
199            label: None,
200            entries: &[wgpu::BindGroupLayoutEntry {
201                binding: 0,
202                visibility: wgpu::ShaderStages::COMPUTE,
203                ty: wgpu::BindingType::Buffer {
204                    ty: wgpu::BufferBindingType::Storage { read_only: false },
205                    has_dynamic_offset: false,
206                    // Going to have this be None just to be safe.
207                    min_binding_size: None,
208                },
209                count: None,
210            }],
211        });
212        // This ties actual resources stored in the GPU to our metaphorical function
213        // through the binding slots we defined above.
214        let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {
215            label: None,
216            layout: &bind_group_layout,
217            entries: &[wgpu::BindGroupEntry {
218                binding: 0,
219                resource: storage_buffer.as_entire_binding(),
220            }],
221        });
222
223        let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
224            label: None,
225            bind_group_layouts: &[&bind_group_layout],
226            push_constant_ranges: &[],
227        });
228        let pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
229            label: None,
230            layout: Some(&pipeline_layout),
231            module: &shader,
232            entry_point: Some("main"),
233            compilation_options: Default::default(),
234            cache: None,
235        });
236
237        WgpuContext {
238            device,
239            queue,
240            pipeline,
241            bind_group,
242            storage_buffer,
243            output_staging_buffer,
244        }
245    }
246}