naga/front/wgsl/parse/
lexer.rs

1use super::{number::consume_number, Error, ExpectedToken, Result};
2use crate::front::wgsl::error::NumberError;
3use crate::front::wgsl::parse::directive::enable_extension::{
4    EnableExtensions, ImplementedEnableExtension,
5};
6use crate::front::wgsl::parse::Number;
7use crate::Span;
8
9use alloc::{boxed::Box, vec::Vec};
10
11pub type TokenSpan<'a> = (Token<'a>, Span);
12
13#[derive(Copy, Clone, Debug, PartialEq)]
14pub enum Token<'a> {
15    /// A separator character: `:;,`, and `.` when not part of a numeric
16    /// literal.
17    Separator(char),
18
19    /// A parenthesis-like character: `()[]{}`, and also `<>`.
20    ///
21    /// Note that `<>` representing template argument brackets are distinguished
22    /// using WGSL's [template list discovery algorithm][tlda], and are returned
23    /// as [`Token::TemplateArgsStart`] and [`Token::TemplateArgsEnd`]. That is,
24    /// we use `Paren` for `<>` when they are *not* parens.
25    ///
26    /// [tlda]: https://gpuweb.github.io/gpuweb/wgsl/#template-list-discovery
27    Paren(char),
28
29    /// The attribute introduction character `@`.
30    Attribute,
31
32    /// A numeric literal, either integral or floating-point, including any
33    /// type suffix.
34    Number(core::result::Result<Number, NumberError>),
35
36    /// An identifier, possibly a reserved word.
37    Word(&'a str),
38
39    /// A miscellaneous single-character operator, like an arithmetic unary or
40    /// binary operator. This includes `=`, for assignment and initialization.
41    Operation(char),
42
43    /// Certain multi-character logical operators: `!=`, `==`, `&&`,
44    /// `||`, `<=` and `>=`. The value gives the operator's first
45    /// character.
46    ///
47    /// For `<` and `>` operators, see [`Token::Paren`].
48    LogicalOperation(char),
49
50    /// A shift operator: `>>` or `<<`.
51    ShiftOperation(char),
52
53    /// A compound assignment operator like `+=`.
54    ///
55    /// When the given character is `<` or `>`, those represent the left shift
56    /// and right shift assignment operators, `<<=` and `>>=`.
57    AssignmentOperation(char),
58
59    /// The `++` operator.
60    IncrementOperation,
61
62    /// The `--` operator.
63    DecrementOperation,
64
65    /// The `->` token.
66    Arrow,
67
68    /// A `<` representing the start of a template argument list, according to
69    /// WGSL's [template list discovery algorithm][tlda].
70    ///
71    /// [tlda]: https://gpuweb.github.io/gpuweb/wgsl/#template-list-discovery
72    TemplateArgsStart,
73
74    /// A `>` representing the end of a template argument list, according to
75    /// WGSL's [template list discovery algorithm][tlda].
76    ///
77    /// [tlda]: https://gpuweb.github.io/gpuweb/wgsl/#template-list-discovery
78    TemplateArgsEnd,
79
80    /// A character that does not represent a legal WGSL token.
81    Unknown(char),
82
83    /// Comment or whitespace.
84    Trivia,
85
86    /// A doc comment, beginning with `///` or `/**`.
87    DocComment(&'a str),
88
89    /// A module-level doc comment, beginning with `//!` or `/*!`.
90    ModuleDocComment(&'a str),
91
92    /// The end of the input.
93    End,
94}
95
96fn consume_any(input: &str, what: impl Fn(char) -> bool) -> (&str, &str) {
97    let pos = input.find(|c| !what(c)).unwrap_or(input.len());
98    input.split_at(pos)
99}
100
101struct UnclosedCandidate {
102    index: usize,
103    depth: usize,
104}
105
106/// Produce at least one token, distinguishing [template lists] from other uses
107/// of `<` and `>`.
108///
109/// Consume one or more tokens from `input` and store them in `tokens`, updating
110/// `input` to refer to the remaining text. Apply WGSL's [template list
111/// discovery algorithm] to decide what sort of tokens `<` and `>` characters in
112/// the input actually represent.
113///
114/// Store the tokens in `tokens` in the *reverse* of the order they appear in
115/// the text, such that the caller can pop from the end of the vector to see the
116/// tokens in textual order.
117///
118/// The `tokens` vector must be empty on entry. The idea is for the caller to
119/// use it as a buffer of unconsumed tokens, and call this function to refill it
120/// when it's empty.
121///
122/// The `source` argument must be the whole original source code, used to
123/// compute spans.
124///
125/// If `ignore_doc_comments` is true, then doc comments are returned as
126/// [`Token::Trivia`], like ordinary comments.
127///
128/// [template lists]: https://gpuweb.github.io/gpuweb/wgsl/#template-lists-sec
129/// [template list discovery algorithm]: https://gpuweb.github.io/gpuweb/wgsl/#template-list-discovery
130fn discover_template_lists<'a>(
131    tokens: &mut Vec<(TokenSpan<'a>, &'a str)>,
132    source: &'a str,
133    mut input: &'a str,
134    ignore_doc_comments: bool,
135) {
136    assert!(tokens.is_empty());
137
138    let mut looking_for_template_start = false;
139    let mut pending: Vec<UnclosedCandidate> = Vec::new();
140
141    // Current nesting depth of `()` and `[]` brackets. (`{}` brackets
142    // exit all template list processing.)
143    let mut depth = 0;
144
145    fn pop_until(pending: &mut Vec<UnclosedCandidate>, depth: usize) {
146        while pending
147            .last()
148            .map(|candidate| candidate.depth >= depth)
149            .unwrap_or(false)
150        {
151            pending.pop();
152        }
153    }
154
155    loop {
156        // Decide whether `consume_token` should treat a `>` character as
157        // `TemplateArgsEnd`, without considering the characters that follow.
158        //
159        // This condition matches the one that determines whether the spec's
160        // template list discovery algorithm looks past a `>` character for a
161        // `=`. By passing this flag to `consume_token`, we ensure it follows
162        // that behavior.
163        let waiting_for_template_end = pending
164            .last()
165            .is_some_and(|candidate| candidate.depth == depth);
166
167        // Ask `consume_token` for the next token and add it to `tokens`, along
168        // with its span.
169        //
170        // This means that `<` enters the buffer as `Token::Paren('<')`, the
171        // ordinary comparison operator. We'll change that to
172        // `Token::TemplateArgsStart` later if appropriate.
173        let (token, rest) = consume_token(input, waiting_for_template_end, ignore_doc_comments);
174        let span = Span::from(source.len() - input.len()..source.len() - rest.len());
175        tokens.push(((token, span), rest));
176        input = rest;
177
178        // Since `consume_token` treats `<<=`, `<<` and `<=` as operators, not
179        // `Token::Paren`, that takes care of the WGSL algorithm's post-'<' lookahead
180        // for us.
181        match token {
182            Token::Word(_) => {
183                looking_for_template_start = true;
184                continue;
185            }
186            Token::Trivia | Token::DocComment(_) | Token::ModuleDocComment(_)
187                if looking_for_template_start =>
188            {
189                continue;
190            }
191            Token::Paren('<') if looking_for_template_start => {
192                pending.push(UnclosedCandidate {
193                    index: tokens.len() - 1,
194                    depth,
195                });
196            }
197            Token::TemplateArgsEnd => {
198                // The `consume_token` function only returns `TemplateArgsEnd`
199                // if `waiting_for_template_end` is true, so we know `pending`
200                // has a top entry at the appropriate depth.
201                //
202                // Find the matching `<` token and change its type to
203                // `TemplateArgsStart`.
204                let candidate = pending.pop().unwrap();
205                let &mut ((ref mut token, _), _) = tokens.get_mut(candidate.index).unwrap();
206                *token = Token::TemplateArgsStart;
207            }
208            Token::Paren('(' | '[') => {
209                depth += 1;
210            }
211            Token::Paren(')' | ']') => {
212                pop_until(&mut pending, depth);
213                depth = depth.saturating_sub(1);
214            }
215            Token::Operation('=') | Token::Separator(':' | ';') | Token::Paren('{') => {
216                pending.clear();
217                depth = 0;
218            }
219            Token::LogicalOperation('&') | Token::LogicalOperation('|') => {
220                pop_until(&mut pending, depth);
221            }
222            Token::End => break,
223            _ => {}
224        }
225
226        looking_for_template_start = false;
227
228        // The WGSL spec's template list discovery algorithm processes the
229        // entire source at once, but Naga would rather limit its lookahead to
230        // the actual text that could possibly be a template parameter list.
231        // This is usually less than a line.
232        if pending.is_empty() {
233            break;
234        }
235    }
236
237    tokens.reverse();
238}
239
240/// Return the token at the start of `input`.
241///
242/// The `waiting_for_template_end` flag enables some special handling to help out
243/// `discover_template_lists`:
244///
245/// - If `waiting_for_template_end` is `true`, then return text starting with
246///   '>` as [`Token::TemplateArgsEnd`] and consume only the `>` character,
247///   regardless of what characters follow it. This is required by the [template
248///   list discovery algorithm][tlda] when the `>` would end a template argument list.
249///
250/// - If `waiting_for_template_end` is false, recognize multi-character tokens
251///   beginning with `>` as usual.
252///
253/// If `ignore_doc_comments` is true, then doc comments are returned as
254/// [`Token::Trivia`], like ordinary comments.
255///
256/// [tlda]: https://gpuweb.github.io/gpuweb/wgsl/#template-list-discovery
257fn consume_token(
258    input: &str,
259    waiting_for_template_end: bool,
260    ignore_doc_comments: bool,
261) -> (Token<'_>, &str) {
262    let mut chars = input.chars();
263    let cur = match chars.next() {
264        Some(c) => c,
265        None => return (Token::End, ""),
266    };
267    match cur {
268        ':' | ';' | ',' => (Token::Separator(cur), chars.as_str()),
269        '.' => {
270            let og_chars = chars.as_str();
271            match chars.next() {
272                Some('0'..='9') => consume_number(input),
273                _ => (Token::Separator(cur), og_chars),
274            }
275        }
276        '@' => (Token::Attribute, chars.as_str()),
277        '(' | ')' | '{' | '}' | '[' | ']' => (Token::Paren(cur), chars.as_str()),
278        '<' | '>' => {
279            let og_chars = chars.as_str();
280            if cur == '>' && waiting_for_template_end {
281                return (Token::TemplateArgsEnd, og_chars);
282            }
283            match chars.next() {
284                Some('=') => (Token::LogicalOperation(cur), chars.as_str()),
285                Some(c) if c == cur => {
286                    let og_chars = chars.as_str();
287                    match chars.next() {
288                        Some('=') => (Token::AssignmentOperation(cur), chars.as_str()),
289                        _ => (Token::ShiftOperation(cur), og_chars),
290                    }
291                }
292                _ => (Token::Paren(cur), og_chars),
293            }
294        }
295        '0'..='9' => consume_number(input),
296        '/' => {
297            let og_chars = chars.as_str();
298            match chars.next() {
299                Some('/') => {
300                    let mut input_chars = input.char_indices();
301                    let doc_comment_end = input_chars
302                        .find_map(|(index, c)| is_comment_end(c).then_some(index))
303                        .unwrap_or(input.len());
304                    let token = match chars.next() {
305                        Some('/') if !ignore_doc_comments => {
306                            Token::DocComment(&input[..doc_comment_end])
307                        }
308                        Some('!') if !ignore_doc_comments => {
309                            Token::ModuleDocComment(&input[..doc_comment_end])
310                        }
311                        _ => Token::Trivia,
312                    };
313                    (token, input_chars.as_str())
314                }
315                Some('*') => {
316                    let next_c = chars.next();
317
318                    enum CommentType {
319                        Doc,
320                        ModuleDoc,
321                        Normal,
322                    }
323                    let comment_type = match next_c {
324                        Some('*') if !ignore_doc_comments => CommentType::Doc,
325                        Some('!') if !ignore_doc_comments => CommentType::ModuleDoc,
326                        _ => CommentType::Normal,
327                    };
328
329                    let mut depth = 1;
330                    let mut prev = next_c;
331
332                    for c in &mut chars {
333                        match (prev, c) {
334                            (Some('*'), '/') => {
335                                prev = None;
336                                depth -= 1;
337                                if depth == 0 {
338                                    let rest = chars.as_str();
339                                    let token = match comment_type {
340                                        CommentType::Doc => {
341                                            let doc_comment_end = input.len() - rest.len();
342                                            Token::DocComment(&input[..doc_comment_end])
343                                        }
344                                        CommentType::ModuleDoc => {
345                                            let doc_comment_end = input.len() - rest.len();
346                                            Token::ModuleDocComment(&input[..doc_comment_end])
347                                        }
348                                        CommentType::Normal => Token::Trivia,
349                                    };
350                                    return (token, rest);
351                                }
352                            }
353                            (Some('/'), '*') => {
354                                prev = None;
355                                depth += 1;
356                            }
357                            _ => {
358                                prev = Some(c);
359                            }
360                        }
361                    }
362
363                    (Token::End, "")
364                }
365                Some('=') => (Token::AssignmentOperation(cur), chars.as_str()),
366                _ => (Token::Operation(cur), og_chars),
367            }
368        }
369        '-' => {
370            let og_chars = chars.as_str();
371            match chars.next() {
372                Some('>') => (Token::Arrow, chars.as_str()),
373                Some('-') => (Token::DecrementOperation, chars.as_str()),
374                Some('=') => (Token::AssignmentOperation(cur), chars.as_str()),
375                _ => (Token::Operation(cur), og_chars),
376            }
377        }
378        '+' => {
379            let og_chars = chars.as_str();
380            match chars.next() {
381                Some('+') => (Token::IncrementOperation, chars.as_str()),
382                Some('=') => (Token::AssignmentOperation(cur), chars.as_str()),
383                _ => (Token::Operation(cur), og_chars),
384            }
385        }
386        '*' | '%' | '^' => {
387            let og_chars = chars.as_str();
388            match chars.next() {
389                Some('=') => (Token::AssignmentOperation(cur), chars.as_str()),
390                _ => (Token::Operation(cur), og_chars),
391            }
392        }
393        '~' => (Token::Operation(cur), chars.as_str()),
394        '=' | '!' => {
395            let og_chars = chars.as_str();
396            match chars.next() {
397                Some('=') => (Token::LogicalOperation(cur), chars.as_str()),
398                _ => (Token::Operation(cur), og_chars),
399            }
400        }
401        '&' | '|' => {
402            let og_chars = chars.as_str();
403            match chars.next() {
404                Some(c) if c == cur => (Token::LogicalOperation(cur), chars.as_str()),
405                Some('=') => (Token::AssignmentOperation(cur), chars.as_str()),
406                _ => (Token::Operation(cur), og_chars),
407            }
408        }
409        _ if is_blankspace(cur) => {
410            let (_, rest) = consume_any(input, is_blankspace);
411            (Token::Trivia, rest)
412        }
413        _ if is_word_start(cur) => {
414            let (word, rest) = consume_any(input, is_word_part);
415            (Token::Word(word), rest)
416        }
417        _ => (Token::Unknown(cur), chars.as_str()),
418    }
419}
420
421/// Returns whether or not a char is a comment end
422/// (Unicode Pattern_White_Space excluding U+0020, U+0009, U+200E and U+200F)
423/// <https://www.w3.org/TR/WGSL/#line-break>
424const fn is_comment_end(c: char) -> bool {
425    match c {
426        '\u{000a}'..='\u{000d}' | '\u{0085}' | '\u{2028}' | '\u{2029}' => true,
427        _ => false,
428    }
429}
430
431/// Returns whether or not a char is a blankspace (Unicode Pattern_White_Space)
432const fn is_blankspace(c: char) -> bool {
433    match c {
434        '\u{0020}'
435        | '\u{0009}'..='\u{000d}'
436        | '\u{0085}'
437        | '\u{200e}'
438        | '\u{200f}'
439        | '\u{2028}'
440        | '\u{2029}' => true,
441        _ => false,
442    }
443}
444
445/// Returns whether or not a char is a word start (Unicode XID_Start + '_')
446fn is_word_start(c: char) -> bool {
447    c == '_' || unicode_ident::is_xid_start(c)
448}
449
450/// Returns whether or not a char is a word part (Unicode XID_Continue)
451fn is_word_part(c: char) -> bool {
452    unicode_ident::is_xid_continue(c)
453}
454
455pub(in crate::front::wgsl) struct Lexer<'a> {
456    /// The remaining unconsumed input.
457    input: &'a str,
458
459    /// The full original source code.
460    ///
461    /// We compare `input` against this to compute the lexer's current offset in
462    /// the source.
463    pub(in crate::front::wgsl) source: &'a str,
464
465    /// The byte offset of the end of the most recently returned non-trivia
466    /// token.
467    ///
468    /// This is consulted by the `span_from` function, for finding the
469    /// end of the span for larger structures like expressions or
470    /// statements.
471    last_end_offset: usize,
472
473    /// A stack of unconsumed tokens to which template list discovery has been
474    /// applied.
475    ///
476    /// This is a stack: the next token is at the *end* of the vector, not the
477    /// start. So tokens appear here in the reverse of the order they appear in
478    /// the source.
479    ///
480    /// This doesn't contain the whole source, only those tokens produced by
481    /// [`discover_template_lists`]'s look-ahead, or that have been produced by
482    /// other look-ahead functions like `peek` and `next_if`. When this is empty,
483    /// we call [`discover_template_lists`] to get more.
484    tokens: Vec<(TokenSpan<'a>, &'a str)>,
485
486    /// Whether or not to ignore doc comments.
487    /// If `true`, doc comments are treated as [`Token::Trivia`].
488    ignore_doc_comments: bool,
489
490    /// The set of [enable-extensions] present in the module, determined in a pre-pass.
491    ///
492    /// [enable-extensions]: https://gpuweb.github.io/gpuweb/wgsl/#enable-extensions-sec
493    pub(in crate::front::wgsl) enable_extensions: EnableExtensions,
494}
495
496impl<'a> Lexer<'a> {
497    pub(in crate::front::wgsl) const fn new(input: &'a str, ignore_doc_comments: bool) -> Self {
498        Lexer {
499            input,
500            source: input,
501            last_end_offset: 0,
502            tokens: Vec::new(),
503            enable_extensions: EnableExtensions::empty(),
504            ignore_doc_comments,
505        }
506    }
507
508    /// Check that `extension` is enabled in `self`.
509    pub(in crate::front::wgsl) fn require_enable_extension(
510        &self,
511        extension: ImplementedEnableExtension,
512        span: Span,
513    ) -> Result<'static, ()> {
514        if self.enable_extensions.contains(extension) {
515            Ok(())
516        } else {
517            Err(Box::new(Error::EnableExtensionNotEnabled {
518                kind: extension.into(),
519                span,
520            }))
521        }
522    }
523
524    /// Calls the function with a lexer and returns the result of the function as well as the span for everything the function parsed
525    ///
526    /// # Examples
527    /// ```ignore
528    /// let lexer = Lexer::new("5");
529    /// let (value, span) = lexer.capture_span(Lexer::next_uint_literal);
530    /// assert_eq!(value, 5);
531    /// ```
532    #[inline]
533    pub fn capture_span<T, E>(
534        &mut self,
535        inner: impl FnOnce(&mut Self) -> core::result::Result<T, E>,
536    ) -> core::result::Result<(T, Span), E> {
537        let start = self.current_byte_offset();
538        let res = inner(self)?;
539        let end = self.current_byte_offset();
540        Ok((res, Span::from(start..end)))
541    }
542
543    pub(in crate::front::wgsl) fn start_byte_offset(&mut self) -> usize {
544        loop {
545            // Eat all trivia because `next` doesn't eat trailing trivia.
546            let (token, rest) = consume_token(self.input, false, true);
547            if let Token::Trivia = token {
548                self.input = rest;
549            } else {
550                return self.current_byte_offset();
551            }
552        }
553    }
554
555    /// Collect all module doc comments until a non doc token is found.
556    pub(in crate::front::wgsl) fn accumulate_module_doc_comments(&mut self) -> Vec<&'a str> {
557        let mut doc_comments = Vec::new();
558        loop {
559            // ignore blankspace
560            self.input = consume_any(self.input, is_blankspace).1;
561
562            let (token, rest) = consume_token(self.input, false, self.ignore_doc_comments);
563            if let Token::ModuleDocComment(doc_comment) = token {
564                self.input = rest;
565                doc_comments.push(doc_comment);
566            } else {
567                return doc_comments;
568            }
569        }
570    }
571
572    /// Collect all doc comments until a non doc token is found.
573    pub(in crate::front::wgsl) fn accumulate_doc_comments(&mut self) -> Vec<&'a str> {
574        let mut doc_comments = Vec::new();
575        loop {
576            // ignore blankspace
577            self.input = consume_any(self.input, is_blankspace).1;
578
579            let (token, rest) = consume_token(self.input, false, self.ignore_doc_comments);
580            if let Token::DocComment(doc_comment) = token {
581                self.input = rest;
582                doc_comments.push(doc_comment);
583            } else {
584                return doc_comments;
585            }
586        }
587    }
588
589    const fn current_byte_offset(&self) -> usize {
590        self.source.len() - self.input.len()
591    }
592
593    pub(in crate::front::wgsl) fn span_from(&self, offset: usize) -> Span {
594        Span::from(offset..self.last_end_offset)
595    }
596    pub(in crate::front::wgsl) fn span_with_start(&self, span: Span) -> Span {
597        span.until(&Span::from(0..self.last_end_offset))
598    }
599
600    /// Return the next non-whitespace token from `self`.
601    ///
602    /// Assume we are a parse state where bit shift operators may
603    /// occur, but not angle brackets.
604    #[must_use]
605    pub(in crate::front::wgsl) fn next(&mut self) -> TokenSpan<'a> {
606        self.next_impl(true)
607    }
608
609    #[cfg(test)]
610    pub fn next_with_unignored_doc_comments(&mut self) -> TokenSpan<'a> {
611        self.next_impl(false)
612    }
613
614    /// Return the next non-whitespace token from `self`, with a span.
615    fn next_impl(&mut self, ignore_doc_comments: bool) -> TokenSpan<'a> {
616        loop {
617            if self.tokens.is_empty() {
618                discover_template_lists(
619                    &mut self.tokens,
620                    self.source,
621                    self.input,
622                    ignore_doc_comments || self.ignore_doc_comments,
623                );
624            }
625            assert!(!self.tokens.is_empty());
626            let (token, rest) = self.tokens.pop().unwrap();
627
628            self.input = rest;
629            self.last_end_offset = self.current_byte_offset();
630
631            match token.0 {
632                Token::Trivia => {}
633                _ => return token,
634            }
635        }
636    }
637
638    #[must_use]
639    pub(in crate::front::wgsl) fn peek(&mut self) -> TokenSpan<'a> {
640        let input = self.input;
641        let last_end_offset = self.last_end_offset;
642        let token = self.next();
643        self.tokens.push((token, self.input));
644        self.input = input;
645        self.last_end_offset = last_end_offset;
646        token
647    }
648
649    /// If the next token matches it's consumed and true is returned
650    pub(in crate::front::wgsl) fn next_if(&mut self, what: Token<'_>) -> bool {
651        let input = self.input;
652        let last_end_offset = self.last_end_offset;
653        let token = self.next();
654        if token.0 == what {
655            true
656        } else {
657            self.tokens.push((token, self.input));
658            self.input = input;
659            self.last_end_offset = last_end_offset;
660            false
661        }
662    }
663
664    pub(in crate::front::wgsl) fn expect_span(&mut self, expected: Token<'a>) -> Result<'a, Span> {
665        let next = self.next();
666        if next.0 == expected {
667            Ok(next.1)
668        } else {
669            Err(Box::new(Error::Unexpected(
670                next.1,
671                ExpectedToken::Token(expected),
672            )))
673        }
674    }
675
676    pub(in crate::front::wgsl) fn expect(&mut self, expected: Token<'a>) -> Result<'a, ()> {
677        self.expect_span(expected)?;
678        Ok(())
679    }
680
681    pub(in crate::front::wgsl) fn next_ident_with_span(&mut self) -> Result<'a, (&'a str, Span)> {
682        match self.next() {
683            (Token::Word("_"), span) => Err(Box::new(Error::InvalidIdentifierUnderscore(span))),
684            (Token::Word(word), span) => {
685                if word.starts_with("__") {
686                    Err(Box::new(Error::ReservedIdentifierPrefix(span)))
687                } else {
688                    Ok((word, span))
689                }
690            }
691            (_, span) => Err(Box::new(Error::Unexpected(span, ExpectedToken::Identifier))),
692        }
693    }
694
695    pub(in crate::front::wgsl) fn next_ident(&mut self) -> Result<'a, super::ast::Ident<'a>> {
696        self.next_ident_with_span()
697            .and_then(|(word, span)| Self::word_as_ident(word, span))
698            .map(|(name, span)| super::ast::Ident { name, span })
699    }
700
701    fn word_as_ident(word: &'a str, span: Span) -> Result<'a, (&'a str, Span)> {
702        if crate::keywords::wgsl::RESERVED.contains(&word) {
703            Err(Box::new(Error::ReservedKeyword(span)))
704        } else {
705            Ok((word, span))
706        }
707    }
708
709    pub(in crate::front::wgsl) fn open_arguments(&mut self) -> Result<'a, ()> {
710        self.expect(Token::Paren('('))
711    }
712
713    pub(in crate::front::wgsl) fn next_argument(&mut self) -> Result<'a, bool> {
714        let paren = Token::Paren(')');
715        if self.next_if(Token::Separator(',')) {
716            Ok(!self.next_if(paren))
717        } else {
718            self.expect(paren).map(|()| false)
719        }
720    }
721}
722
723#[cfg(test)]
724#[track_caller]
725fn sub_test(source: &str, expected_tokens: &[Token]) {
726    sub_test_with(true, source, expected_tokens);
727}
728
729#[cfg(test)]
730#[track_caller]
731fn sub_test_with_and_without_doc_comments(source: &str, expected_tokens: &[Token]) {
732    sub_test_with(false, source, expected_tokens);
733    sub_test_with(
734        true,
735        source,
736        expected_tokens
737            .iter()
738            .filter(|v| !matches!(**v, Token::DocComment(_) | Token::ModuleDocComment(_)))
739            .cloned()
740            .collect::<Vec<_>>()
741            .as_slice(),
742    );
743}
744
745#[cfg(test)]
746#[track_caller]
747fn sub_test_with(ignore_doc_comments: bool, source: &str, expected_tokens: &[Token]) {
748    let mut lex = Lexer::new(source, ignore_doc_comments);
749    for &token in expected_tokens {
750        assert_eq!(lex.next_with_unignored_doc_comments().0, token);
751    }
752    assert_eq!(lex.next().0, Token::End);
753}
754
755#[test]
756fn test_numbers() {
757    use half::f16;
758    // WGSL spec examples //
759
760    // decimal integer
761    sub_test(
762        "0x123 0X123u 1u 123 0 0i 0x3f",
763        &[
764            Token::Number(Ok(Number::AbstractInt(291))),
765            Token::Number(Ok(Number::U32(291))),
766            Token::Number(Ok(Number::U32(1))),
767            Token::Number(Ok(Number::AbstractInt(123))),
768            Token::Number(Ok(Number::AbstractInt(0))),
769            Token::Number(Ok(Number::I32(0))),
770            Token::Number(Ok(Number::AbstractInt(63))),
771        ],
772    );
773    // decimal floating point
774    sub_test(
775        "0.e+4f 01. .01 12.34 .0f 0h 1e-3 0xa.fp+2 0x1P+4f 0X.3 0x3p+2h 0X1.fp-4 0x3.2p+2h",
776        &[
777            Token::Number(Ok(Number::F32(0.))),
778            Token::Number(Ok(Number::AbstractFloat(1.))),
779            Token::Number(Ok(Number::AbstractFloat(0.01))),
780            Token::Number(Ok(Number::AbstractFloat(12.34))),
781            Token::Number(Ok(Number::F32(0.))),
782            Token::Number(Ok(Number::F16(f16::from_f32(0.)))),
783            Token::Number(Ok(Number::AbstractFloat(0.001))),
784            Token::Number(Ok(Number::AbstractFloat(43.75))),
785            Token::Number(Ok(Number::F32(16.))),
786            Token::Number(Ok(Number::AbstractFloat(0.1875))),
787            // https://github.com/gfx-rs/wgpu/issues/7046
788            Token::Number(Err(NumberError::NotRepresentable)), // Should be 0.75
789            Token::Number(Ok(Number::AbstractFloat(0.12109375))),
790            // https://github.com/gfx-rs/wgpu/issues/7046
791            Token::Number(Err(NumberError::NotRepresentable)), // Should be 12.5
792        ],
793    );
794
795    // MIN / MAX //
796
797    // min / max decimal integer
798    sub_test(
799        "0i 2147483647i 2147483648i",
800        &[
801            Token::Number(Ok(Number::I32(0))),
802            Token::Number(Ok(Number::I32(i32::MAX))),
803            Token::Number(Err(NumberError::NotRepresentable)),
804        ],
805    );
806    // min / max decimal unsigned integer
807    sub_test(
808        "0u 4294967295u 4294967296u",
809        &[
810            Token::Number(Ok(Number::U32(u32::MIN))),
811            Token::Number(Ok(Number::U32(u32::MAX))),
812            Token::Number(Err(NumberError::NotRepresentable)),
813        ],
814    );
815
816    // min / max hexadecimal signed integer
817    sub_test(
818        "0x0i 0x7FFFFFFFi 0x80000000i",
819        &[
820            Token::Number(Ok(Number::I32(0))),
821            Token::Number(Ok(Number::I32(i32::MAX))),
822            Token::Number(Err(NumberError::NotRepresentable)),
823        ],
824    );
825    // min / max hexadecimal unsigned integer
826    sub_test(
827        "0x0u 0xFFFFFFFFu 0x100000000u",
828        &[
829            Token::Number(Ok(Number::U32(u32::MIN))),
830            Token::Number(Ok(Number::U32(u32::MAX))),
831            Token::Number(Err(NumberError::NotRepresentable)),
832        ],
833    );
834
835    // min/max decimal abstract int
836    sub_test(
837        "0 9223372036854775807 9223372036854775808",
838        &[
839            Token::Number(Ok(Number::AbstractInt(0))),
840            Token::Number(Ok(Number::AbstractInt(i64::MAX))),
841            Token::Number(Err(NumberError::NotRepresentable)),
842        ],
843    );
844
845    // min/max hexadecimal abstract int
846    sub_test(
847        "0 0x7fffffffffffffff 0x8000000000000000",
848        &[
849            Token::Number(Ok(Number::AbstractInt(0))),
850            Token::Number(Ok(Number::AbstractInt(i64::MAX))),
851            Token::Number(Err(NumberError::NotRepresentable)),
852        ],
853    );
854
855    /// ≈ 2^-126 * 2^−23 (= 2^−149)
856    const SMALLEST_POSITIVE_SUBNORMAL_F32: f32 = 1e-45;
857    /// ≈ 2^-126 * (1 − 2^−23)
858    const LARGEST_SUBNORMAL_F32: f32 = 1.1754942e-38;
859    /// ≈ 2^-126
860    const SMALLEST_POSITIVE_NORMAL_F32: f32 = f32::MIN_POSITIVE;
861    /// ≈ 1 − 2^−24
862    const LARGEST_F32_LESS_THAN_ONE: f32 = 0.99999994;
863    /// ≈ 1 + 2^−23
864    const SMALLEST_F32_LARGER_THAN_ONE: f32 = 1.0000001;
865    /// ≈ 2^127 * (2 − 2^−23)
866    const LARGEST_NORMAL_F32: f32 = f32::MAX;
867
868    // decimal floating point
869    sub_test(
870        "1e-45f 1.1754942e-38f 1.17549435e-38f 0.99999994f 1.0000001f 3.40282347e+38f",
871        &[
872            Token::Number(Ok(Number::F32(SMALLEST_POSITIVE_SUBNORMAL_F32))),
873            Token::Number(Ok(Number::F32(LARGEST_SUBNORMAL_F32))),
874            Token::Number(Ok(Number::F32(SMALLEST_POSITIVE_NORMAL_F32))),
875            Token::Number(Ok(Number::F32(LARGEST_F32_LESS_THAN_ONE))),
876            Token::Number(Ok(Number::F32(SMALLEST_F32_LARGER_THAN_ONE))),
877            Token::Number(Ok(Number::F32(LARGEST_NORMAL_F32))),
878        ],
879    );
880    sub_test(
881        "3.40282367e+38f",
882        &[
883            Token::Number(Err(NumberError::NotRepresentable)), // ≈ 2^128
884        ],
885    );
886
887    // hexadecimal floating point
888    sub_test(
889        "0x1p-149f 0x7FFFFFp-149f 0x1p-126f 0xFFFFFFp-24f 0x800001p-23f 0xFFFFFFp+104f",
890        &[
891            Token::Number(Ok(Number::F32(SMALLEST_POSITIVE_SUBNORMAL_F32))),
892            Token::Number(Ok(Number::F32(LARGEST_SUBNORMAL_F32))),
893            Token::Number(Ok(Number::F32(SMALLEST_POSITIVE_NORMAL_F32))),
894            Token::Number(Ok(Number::F32(LARGEST_F32_LESS_THAN_ONE))),
895            Token::Number(Ok(Number::F32(SMALLEST_F32_LARGER_THAN_ONE))),
896            Token::Number(Ok(Number::F32(LARGEST_NORMAL_F32))),
897        ],
898    );
899    sub_test(
900        "0x1p128f 0x1.000001p0f",
901        &[
902            Token::Number(Err(NumberError::NotRepresentable)), // = 2^128
903            Token::Number(Err(NumberError::NotRepresentable)),
904        ],
905    );
906}
907
908#[test]
909fn double_floats() {
910    sub_test(
911        "0x1.2p4lf 0x1p8lf 0.0625lf 625e-4lf 10lf 10l",
912        &[
913            Token::Number(Ok(Number::F64(18.0))),
914            Token::Number(Ok(Number::F64(256.0))),
915            Token::Number(Ok(Number::F64(0.0625))),
916            Token::Number(Ok(Number::F64(0.0625))),
917            Token::Number(Ok(Number::F64(10.0))),
918            Token::Number(Ok(Number::AbstractInt(10))),
919            Token::Word("l"),
920        ],
921    )
922}
923
924#[test]
925fn test_tokens() {
926    sub_test("id123_OK", &[Token::Word("id123_OK")]);
927    sub_test(
928        "92No",
929        &[
930            Token::Number(Ok(Number::AbstractInt(92))),
931            Token::Word("No"),
932        ],
933    );
934    sub_test(
935        "2u3o",
936        &[
937            Token::Number(Ok(Number::U32(2))),
938            Token::Number(Ok(Number::AbstractInt(3))),
939            Token::Word("o"),
940        ],
941    );
942    sub_test(
943        "2.4f44po",
944        &[
945            Token::Number(Ok(Number::F32(2.4))),
946            Token::Number(Ok(Number::AbstractInt(44))),
947            Token::Word("po"),
948        ],
949    );
950    sub_test(
951        "Δέλτα réflexion Кызыл 𐰓𐰏𐰇 朝焼け سلام 검정 שָׁלוֹם गुलाबी փիրուզ",
952        &[
953            Token::Word("Δέλτα"),
954            Token::Word("réflexion"),
955            Token::Word("Кызыл"),
956            Token::Word("𐰓𐰏𐰇"),
957            Token::Word("朝焼け"),
958            Token::Word("سلام"),
959            Token::Word("검정"),
960            Token::Word("שָׁלוֹם"),
961            Token::Word("गुलाबी"),
962            Token::Word("փիրուզ"),
963        ],
964    );
965    sub_test("æNoø", &[Token::Word("æNoø")]);
966    sub_test("No¾", &[Token::Word("No"), Token::Unknown('¾')]);
967    sub_test("No好", &[Token::Word("No好")]);
968    sub_test("_No", &[Token::Word("_No")]);
969
970    sub_test_with_and_without_doc_comments(
971        "*/*/***/*//=/*****//",
972        &[
973            Token::Operation('*'),
974            Token::AssignmentOperation('/'),
975            Token::DocComment("/*****/"),
976            Token::Operation('/'),
977        ],
978    );
979
980    // Type suffixes are only allowed on hex float literals
981    // if you provided an exponent.
982    sub_test(
983        "0x1.2f 0x1.2f 0x1.2h 0x1.2H 0x1.2lf",
984        &[
985            // The 'f' suffixes are taken as a hex digit:
986            // the fractional part is 0x2f / 256.
987            Token::Number(Ok(Number::AbstractFloat(1.0 + 0x2f as f64 / 256.0))),
988            Token::Number(Ok(Number::AbstractFloat(1.0 + 0x2f as f64 / 256.0))),
989            Token::Number(Ok(Number::AbstractFloat(1.125))),
990            Token::Word("h"),
991            Token::Number(Ok(Number::AbstractFloat(1.125))),
992            Token::Word("H"),
993            Token::Number(Ok(Number::AbstractFloat(1.125))),
994            Token::Word("lf"),
995        ],
996    )
997}
998
999#[test]
1000fn test_variable_decl() {
1001    sub_test(
1002        "@group(0 ) var< uniform> texture:   texture_multisampled_2d <f32 >;",
1003        &[
1004            Token::Attribute,
1005            Token::Word("group"),
1006            Token::Paren('('),
1007            Token::Number(Ok(Number::AbstractInt(0))),
1008            Token::Paren(')'),
1009            Token::Word("var"),
1010            Token::TemplateArgsStart,
1011            Token::Word("uniform"),
1012            Token::TemplateArgsEnd,
1013            Token::Word("texture"),
1014            Token::Separator(':'),
1015            Token::Word("texture_multisampled_2d"),
1016            Token::TemplateArgsStart,
1017            Token::Word("f32"),
1018            Token::TemplateArgsEnd,
1019            Token::Separator(';'),
1020        ],
1021    );
1022    sub_test(
1023        "var<storage,read_write> buffer: array<u32>;",
1024        &[
1025            Token::Word("var"),
1026            Token::TemplateArgsStart,
1027            Token::Word("storage"),
1028            Token::Separator(','),
1029            Token::Word("read_write"),
1030            Token::TemplateArgsEnd,
1031            Token::Word("buffer"),
1032            Token::Separator(':'),
1033            Token::Word("array"),
1034            Token::TemplateArgsStart,
1035            Token::Word("u32"),
1036            Token::TemplateArgsEnd,
1037            Token::Separator(';'),
1038        ],
1039    );
1040}
1041
1042#[test]
1043fn test_template_list() {
1044    sub_test(
1045        "A<B||C>D",
1046        &[
1047            Token::Word("A"),
1048            Token::Paren('<'),
1049            Token::Word("B"),
1050            Token::LogicalOperation('|'),
1051            Token::Word("C"),
1052            Token::Paren('>'),
1053            Token::Word("D"),
1054        ],
1055    );
1056    sub_test(
1057        "A(B<C,D>(E))",
1058        &[
1059            Token::Word("A"),
1060            Token::Paren('('),
1061            Token::Word("B"),
1062            Token::TemplateArgsStart,
1063            Token::Word("C"),
1064            Token::Separator(','),
1065            Token::Word("D"),
1066            Token::TemplateArgsEnd,
1067            Token::Paren('('),
1068            Token::Word("E"),
1069            Token::Paren(')'),
1070            Token::Paren(')'),
1071        ],
1072    );
1073    sub_test(
1074        "array<i32,select(2,3,A>B)>",
1075        &[
1076            Token::Word("array"),
1077            Token::TemplateArgsStart,
1078            Token::Word("i32"),
1079            Token::Separator(','),
1080            Token::Word("select"),
1081            Token::Paren('('),
1082            Token::Number(Ok(Number::AbstractInt(2))),
1083            Token::Separator(','),
1084            Token::Number(Ok(Number::AbstractInt(3))),
1085            Token::Separator(','),
1086            Token::Word("A"),
1087            Token::Paren('>'),
1088            Token::Word("B"),
1089            Token::Paren(')'),
1090            Token::TemplateArgsEnd,
1091        ],
1092    );
1093    sub_test(
1094        "A[B<C]>D",
1095        &[
1096            Token::Word("A"),
1097            Token::Paren('['),
1098            Token::Word("B"),
1099            Token::Paren('<'),
1100            Token::Word("C"),
1101            Token::Paren(']'),
1102            Token::Paren('>'),
1103            Token::Word("D"),
1104        ],
1105    );
1106    sub_test(
1107        "A<B<<C>",
1108        &[
1109            Token::Word("A"),
1110            Token::TemplateArgsStart,
1111            Token::Word("B"),
1112            Token::ShiftOperation('<'),
1113            Token::Word("C"),
1114            Token::TemplateArgsEnd,
1115        ],
1116    );
1117    sub_test(
1118        "A<(B>=C)>",
1119        &[
1120            Token::Word("A"),
1121            Token::TemplateArgsStart,
1122            Token::Paren('('),
1123            Token::Word("B"),
1124            Token::LogicalOperation('>'),
1125            Token::Word("C"),
1126            Token::Paren(')'),
1127            Token::TemplateArgsEnd,
1128        ],
1129    );
1130    sub_test(
1131        "A<B>=C>",
1132        &[
1133            Token::Word("A"),
1134            Token::TemplateArgsStart,
1135            Token::Word("B"),
1136            Token::TemplateArgsEnd,
1137            Token::Operation('='),
1138            Token::Word("C"),
1139            Token::Paren('>'),
1140        ],
1141    );
1142}
1143
1144#[test]
1145fn test_comments() {
1146    sub_test("// Single comment", &[]);
1147
1148    sub_test(
1149        "/* multi
1150    line
1151    comment */",
1152        &[],
1153    );
1154    sub_test(
1155        "/* multi
1156    line
1157    comment */
1158    // and another",
1159        &[],
1160    );
1161}
1162
1163#[test]
1164fn test_doc_comments() {
1165    sub_test_with_and_without_doc_comments(
1166        "/// Single comment",
1167        &[Token::DocComment("/// Single comment")],
1168    );
1169
1170    sub_test_with_and_without_doc_comments(
1171        "/** multi
1172    line
1173    comment */",
1174        &[Token::DocComment(
1175            "/** multi
1176    line
1177    comment */",
1178        )],
1179    );
1180    sub_test_with_and_without_doc_comments(
1181        "/** multi
1182    line
1183    comment */
1184    /// and another",
1185        &[
1186            Token::DocComment(
1187                "/** multi
1188    line
1189    comment */",
1190            ),
1191            Token::DocComment("/// and another"),
1192        ],
1193    );
1194}
1195
1196#[test]
1197fn test_doc_comment_nested() {
1198    sub_test_with_and_without_doc_comments(
1199        "/**
1200    a comment with nested one /**
1201        nested comment
1202    */
1203    */
1204    const a : i32 = 2;",
1205        &[
1206            Token::DocComment(
1207                "/**
1208    a comment with nested one /**
1209        nested comment
1210    */
1211    */",
1212            ),
1213            Token::Word("const"),
1214            Token::Word("a"),
1215            Token::Separator(':'),
1216            Token::Word("i32"),
1217            Token::Operation('='),
1218            Token::Number(Ok(Number::AbstractInt(2))),
1219            Token::Separator(';'),
1220        ],
1221    );
1222}
1223
1224#[test]
1225fn test_doc_comment_long_character() {
1226    sub_test_with_and_without_doc_comments(
1227        "/// π/2
1228        ///     D(𝐡) = ───────────────────────────────────────────────────
1229///            παₜα_b((𝐡 ⋅ 𝐭)² / αₜ²) + (𝐡 ⋅ 𝐛)² / α_b² +`
1230    const a : i32 = 2;",
1231        &[
1232            Token::DocComment("/// π/2"),
1233            Token::DocComment("///     D(𝐡) = ───────────────────────────────────────────────────"),
1234            Token::DocComment("///            παₜα_b((𝐡 ⋅ 𝐭)² / αₜ²) + (𝐡 ⋅ 𝐛)² / α_b² +`"),
1235            Token::Word("const"),
1236            Token::Word("a"),
1237            Token::Separator(':'),
1238            Token::Word("i32"),
1239            Token::Operation('='),
1240            Token::Number(Ok(Number::AbstractInt(2))),
1241            Token::Separator(';'),
1242        ],
1243    );
1244}
1245
1246#[test]
1247fn test_doc_comments_module() {
1248    sub_test_with_and_without_doc_comments(
1249        "//! Comment Module
1250        //! Another one.
1251        /*! Different module comment */
1252        /// Trying to break module comment
1253        // Trying to break module comment again
1254        //! After a regular comment is ok.
1255        /*! Different module comment again */
1256
1257        //! After a break is supported.
1258        const
1259        //! After anything else is not.",
1260        &[
1261            Token::ModuleDocComment("//! Comment Module"),
1262            Token::ModuleDocComment("//! Another one."),
1263            Token::ModuleDocComment("/*! Different module comment */"),
1264            Token::DocComment("/// Trying to break module comment"),
1265            Token::ModuleDocComment("//! After a regular comment is ok."),
1266            Token::ModuleDocComment("/*! Different module comment again */"),
1267            Token::ModuleDocComment("//! After a break is supported."),
1268            Token::Word("const"),
1269            Token::ModuleDocComment("//! After anything else is not."),
1270        ],
1271    );
1272}