naga/front/wgsl/parse/
lexer.rs

1use super::{number::consume_number, Error, ExpectedToken, Result};
2use crate::front::wgsl::error::NumberError;
3use crate::front::wgsl::parse::directive::enable_extension::{
4    EnableExtensions, ImplementedEnableExtension,
5};
6use crate::front::wgsl::parse::Number;
7use crate::Span;
8
9use alloc::{boxed::Box, vec::Vec};
10
11pub type TokenSpan<'a> = (Token<'a>, Span);
12
13#[derive(Copy, Clone, Debug, PartialEq)]
14pub enum Token<'a> {
15    /// A separator character: `:;,`, and `.` when not part of a numeric
16    /// literal.
17    Separator(char),
18
19    /// A parenthesis-like character: `()[]{}`, and also `<>`.
20    ///
21    /// Note that `<>` representing template argument brackets are distinguished
22    /// using WGSL's [template list discovery algorithm][tlda], and are returned
23    /// as [`Token::TemplateArgsStart`] and [`Token::TemplateArgsEnd`]. That is,
24    /// we use `Paren` for `<>` when they are *not* parens.
25    ///
26    /// [tlda]: https://gpuweb.github.io/gpuweb/wgsl/#template-list-discovery
27    Paren(char),
28
29    /// The attribute introduction character `@`.
30    Attribute,
31
32    /// A numeric literal, either integral or floating-point, including any
33    /// type suffix.
34    Number(core::result::Result<Number, NumberError>),
35
36    /// An identifier, possibly a reserved word.
37    Word(&'a str),
38
39    /// A miscellaneous single-character operator, like an arithmetic unary or
40    /// binary operator. This includes `=`, for assignment and initialization.
41    Operation(char),
42
43    /// Certain multi-character logical operators: `!=`, `==`, `&&`,
44    /// `||`, `<=` and `>=`. The value gives the operator's first
45    /// character.
46    ///
47    /// For `<` and `>` operators, see [`Token::Paren`].
48    LogicalOperation(char),
49
50    /// A shift operator: `>>` or `<<`.
51    ShiftOperation(char),
52
53    /// A compound assignment operator like `+=`.
54    ///
55    /// When the given character is `<` or `>`, those represent the left shift
56    /// and right shift assignment operators, `<<=` and `>>=`.
57    AssignmentOperation(char),
58
59    /// The `++` operator.
60    IncrementOperation,
61
62    /// The `--` operator.
63    DecrementOperation,
64
65    /// The `->` token.
66    Arrow,
67
68    /// A `<` representing the start of a template argument list, according to
69    /// WGSL's [template list discovery algorithm][tlda].
70    ///
71    /// [tlda]: https://gpuweb.github.io/gpuweb/wgsl/#template-list-discovery
72    TemplateArgsStart,
73
74    /// A `>` representing the end of a template argument list, according to
75    /// WGSL's [template list discovery algorithm][tlda].
76    ///
77    /// [tlda]: https://gpuweb.github.io/gpuweb/wgsl/#template-list-discovery
78    TemplateArgsEnd,
79
80    /// A character that does not represent a legal WGSL token.
81    Unknown(char),
82
83    /// Comment or whitespace.
84    Trivia,
85
86    /// A doc comment, beginning with `///` or `/**`.
87    DocComment(&'a str),
88
89    /// A module-level doc comment, beginning with `//!` or `/*!`.
90    ModuleDocComment(&'a str),
91
92    /// The end of the input.
93    End,
94}
95
96fn consume_any(input: &str, what: impl Fn(char) -> bool) -> (&str, &str) {
97    let pos = input.find(|c| !what(c)).unwrap_or(input.len());
98    input.split_at(pos)
99}
100
101struct UnclosedCandidate {
102    index: usize,
103    depth: usize,
104}
105
106/// Produce at least one token, distinguishing [template lists] from other uses
107/// of `<` and `>`.
108///
109/// Consume one or more tokens from `input` and store them in `tokens`, updating
110/// `input` to refer to the remaining text. Apply WGSL's [template list
111/// discovery algorithm] to decide what sort of tokens `<` and `>` characters in
112/// the input actually represent.
113///
114/// Store the tokens in `tokens` in the *reverse* of the order they appear in
115/// the text, such that the caller can pop from the end of the vector to see the
116/// tokens in textual order.
117///
118/// The `tokens` vector must be empty on entry. The idea is for the caller to
119/// use it as a buffer of unconsumed tokens, and call this function to refill it
120/// when it's empty.
121///
122/// The `source` argument must be the whole original source code, used to
123/// compute spans.
124///
125/// If `ignore_doc_comments` is true, then doc comments are returned as
126/// [`Token::Trivia`], like ordinary comments.
127///
128/// [template lists]: https://gpuweb.github.io/gpuweb/wgsl/#template-lists-sec
129/// [template list discovery algorithm]: https://gpuweb.github.io/gpuweb/wgsl/#template-list-discovery
130fn discover_template_lists<'a>(
131    tokens: &mut Vec<(TokenSpan<'a>, &'a str)>,
132    source: &'a str,
133    mut input: &'a str,
134    ignore_doc_comments: bool,
135) {
136    assert!(tokens.is_empty());
137
138    let mut looking_for_template_start = false;
139    let mut pending: Vec<UnclosedCandidate> = Vec::new();
140
141    // Current nesting depth of `()` and `[]` brackets. (`{}` brackets
142    // exit all template list processing.)
143    let mut depth = 0;
144
145    fn pop_until(pending: &mut Vec<UnclosedCandidate>, depth: usize) {
146        while pending
147            .last()
148            .map(|candidate| candidate.depth >= depth)
149            .unwrap_or(false)
150        {
151            pending.pop();
152        }
153    }
154
155    loop {
156        // Decide whether `consume_token` should treat a `>` character as
157        // `TemplateArgsEnd`, without considering the characters that follow.
158        //
159        // This condition matches the one that determines whether the spec's
160        // template list discovery algorithm looks past a `>` character for a
161        // `=`. By passing this flag to `consume_token`, we ensure it follows
162        // that behavior.
163        let waiting_for_template_end = pending
164            .last()
165            .is_some_and(|candidate| candidate.depth == depth);
166
167        // Ask `consume_token` for the next token and add it to `tokens`, along
168        // with its span.
169        //
170        // This means that `<` enters the buffer as `Token::Paren('<')`, the
171        // ordinary comparison operator. We'll change that to
172        // `Token::TemplateArgsStart` later if appropriate.
173        let (token, rest) = consume_token(input, waiting_for_template_end, ignore_doc_comments);
174        let span = Span::from(source.len() - input.len()..source.len() - rest.len());
175        tokens.push(((token, span), rest));
176        input = rest;
177
178        // Since `consume_token` treats `<<=`, `<<` and `<=` as operators, not
179        // `Token::Paren`, that takes care of the WGSL algorithm's post-'<' lookahead
180        // for us.
181        match token {
182            Token::Word(_) => {
183                looking_for_template_start = true;
184                continue;
185            }
186            Token::Trivia | Token::DocComment(_) | Token::ModuleDocComment(_)
187                if looking_for_template_start =>
188            {
189                continue;
190            }
191            Token::Paren('<') if looking_for_template_start => {
192                pending.push(UnclosedCandidate {
193                    index: tokens.len() - 1,
194                    depth,
195                });
196            }
197            Token::TemplateArgsEnd => {
198                // The `consume_token` function only returns `TemplateArgsEnd`
199                // if `waiting_for_template_end` is true, so we know `pending`
200                // has a top entry at the appropriate depth.
201                //
202                // Find the matching `<` token and change its type to
203                // `TemplateArgsStart`.
204                let candidate = pending.pop().unwrap();
205                let &mut ((ref mut token, _), _) = tokens.get_mut(candidate.index).unwrap();
206                *token = Token::TemplateArgsStart;
207            }
208            Token::Paren('(' | '[') => {
209                depth += 1;
210            }
211            Token::Paren(')' | ']') => {
212                pop_until(&mut pending, depth);
213                depth = depth.saturating_sub(1);
214            }
215            Token::Operation('=') | Token::Separator(':' | ';') | Token::Paren('{') => {
216                pending.clear();
217                depth = 0;
218            }
219            Token::LogicalOperation('&') | Token::LogicalOperation('|') => {
220                pop_until(&mut pending, depth);
221            }
222            Token::End => break,
223            _ => {}
224        }
225
226        looking_for_template_start = false;
227
228        // The WGSL spec's template list discovery algorithm processes the
229        // entire source at once, but Naga would rather limit its lookahead to
230        // the actual text that could possibly be a template parameter list.
231        // This is usually less than a line.
232        if pending.is_empty() {
233            break;
234        }
235    }
236
237    tokens.reverse();
238}
239
240/// Return the token at the start of `input`.
241///
242/// The `waiting_for_template_end` flag enables some special handling to help out
243/// `discover_template_lists`:
244///
245/// - If `waiting_for_template_end` is `true`, then return text starting with
246///   '>` as [`Token::TemplateArgsEnd`] and consume only the `>` character,
247///   regardless of what characters follow it. This is required by the [template
248///   list discovery algorithm][tlda] when the `>` would end a template argument list.
249///
250/// - If `waiting_for_template_end` is false, recognize multi-character tokens
251///   beginning with `>` as usual.
252///
253/// If `ignore_doc_comments` is true, then doc comments are returned as
254/// [`Token::Trivia`], like ordinary comments.
255///
256/// [tlda]: https://gpuweb.github.io/gpuweb/wgsl/#template-list-discovery
257fn consume_token(
258    input: &str,
259    waiting_for_template_end: bool,
260    ignore_doc_comments: bool,
261) -> (Token<'_>, &str) {
262    let mut chars = input.chars();
263    let cur = match chars.next() {
264        Some(c) => c,
265        None => return (Token::End, ""),
266    };
267    match cur {
268        ':' | ';' | ',' => (Token::Separator(cur), chars.as_str()),
269        '.' => {
270            let og_chars = chars.as_str();
271            match chars.next() {
272                Some('0'..='9') => consume_number(input),
273                _ => (Token::Separator(cur), og_chars),
274            }
275        }
276        '@' => (Token::Attribute, chars.as_str()),
277        '(' | ')' | '{' | '}' | '[' | ']' => (Token::Paren(cur), chars.as_str()),
278        '<' | '>' => {
279            let og_chars = chars.as_str();
280            if cur == '>' && waiting_for_template_end {
281                return (Token::TemplateArgsEnd, og_chars);
282            }
283            match chars.next() {
284                Some('=') => (Token::LogicalOperation(cur), chars.as_str()),
285                Some(c) if c == cur => {
286                    let og_chars = chars.as_str();
287                    match chars.next() {
288                        Some('=') => (Token::AssignmentOperation(cur), chars.as_str()),
289                        _ => (Token::ShiftOperation(cur), og_chars),
290                    }
291                }
292                _ => (Token::Paren(cur), og_chars),
293            }
294        }
295        '0'..='9' => consume_number(input),
296        '/' => {
297            let og_chars = chars.as_str();
298            match chars.next() {
299                Some('/') => {
300                    let mut input_chars = input.char_indices();
301                    let doc_comment_end = input_chars
302                        .find_map(|(index, c)| is_comment_end(c).then_some(index))
303                        .unwrap_or(input.len());
304                    let token = match chars.next() {
305                        Some('/') if !ignore_doc_comments => {
306                            Token::DocComment(&input[..doc_comment_end])
307                        }
308                        Some('!') if !ignore_doc_comments => {
309                            Token::ModuleDocComment(&input[..doc_comment_end])
310                        }
311                        _ => Token::Trivia,
312                    };
313                    (token, input_chars.as_str())
314                }
315                Some('*') => {
316                    let next_c = chars.next();
317
318                    enum CommentType {
319                        Doc,
320                        ModuleDoc,
321                        Normal,
322                    }
323                    let comment_type = match next_c {
324                        Some('*') if !ignore_doc_comments => CommentType::Doc,
325                        Some('!') if !ignore_doc_comments => CommentType::ModuleDoc,
326                        _ => CommentType::Normal,
327                    };
328
329                    let mut depth = 1;
330                    let mut prev = next_c;
331
332                    for c in &mut chars {
333                        match (prev, c) {
334                            (Some('*'), '/') => {
335                                prev = None;
336                                depth -= 1;
337                                if depth == 0 {
338                                    let rest = chars.as_str();
339                                    let token = match comment_type {
340                                        CommentType::Doc => {
341                                            let doc_comment_end = input.len() - rest.len();
342                                            Token::DocComment(&input[..doc_comment_end])
343                                        }
344                                        CommentType::ModuleDoc => {
345                                            let doc_comment_end = input.len() - rest.len();
346                                            Token::ModuleDocComment(&input[..doc_comment_end])
347                                        }
348                                        CommentType::Normal => Token::Trivia,
349                                    };
350                                    return (token, rest);
351                                }
352                            }
353                            (Some('/'), '*') => {
354                                prev = None;
355                                depth += 1;
356                            }
357                            _ => {
358                                prev = Some(c);
359                            }
360                        }
361                    }
362
363                    (Token::End, "")
364                }
365                Some('=') => (Token::AssignmentOperation(cur), chars.as_str()),
366                _ => (Token::Operation(cur), og_chars),
367            }
368        }
369        '-' => {
370            let og_chars = chars.as_str();
371            match chars.next() {
372                Some('>') => (Token::Arrow, chars.as_str()),
373                Some('-') => (Token::DecrementOperation, chars.as_str()),
374                Some('=') => (Token::AssignmentOperation(cur), chars.as_str()),
375                _ => (Token::Operation(cur), og_chars),
376            }
377        }
378        '+' => {
379            let og_chars = chars.as_str();
380            match chars.next() {
381                Some('+') => (Token::IncrementOperation, chars.as_str()),
382                Some('=') => (Token::AssignmentOperation(cur), chars.as_str()),
383                _ => (Token::Operation(cur), og_chars),
384            }
385        }
386        '*' | '%' | '^' => {
387            let og_chars = chars.as_str();
388            match chars.next() {
389                Some('=') => (Token::AssignmentOperation(cur), chars.as_str()),
390                _ => (Token::Operation(cur), og_chars),
391            }
392        }
393        '~' => (Token::Operation(cur), chars.as_str()),
394        '=' | '!' => {
395            let og_chars = chars.as_str();
396            match chars.next() {
397                Some('=') => (Token::LogicalOperation(cur), chars.as_str()),
398                _ => (Token::Operation(cur), og_chars),
399            }
400        }
401        '&' | '|' => {
402            let og_chars = chars.as_str();
403            match chars.next() {
404                Some(c) if c == cur => (Token::LogicalOperation(cur), chars.as_str()),
405                Some('=') => (Token::AssignmentOperation(cur), chars.as_str()),
406                _ => (Token::Operation(cur), og_chars),
407            }
408        }
409        _ if is_blankspace(cur) => {
410            let (_, rest) = consume_any(input, is_blankspace);
411            (Token::Trivia, rest)
412        }
413        _ if is_word_start(cur) => {
414            let (word, rest) = consume_any(input, is_word_part);
415            (Token::Word(word), rest)
416        }
417        _ => (Token::Unknown(cur), chars.as_str()),
418    }
419}
420
421/// Returns whether or not a char is a comment end
422/// (Unicode Pattern_White_Space excluding U+0020, U+0009, U+200E and U+200F)
423/// <https://www.w3.org/TR/WGSL/#line-break>
424const fn is_comment_end(c: char) -> bool {
425    match c {
426        '\u{000a}'..='\u{000d}' | '\u{0085}' | '\u{2028}' | '\u{2029}' => true,
427        _ => false,
428    }
429}
430
431/// Returns whether or not a char is a blankspace (Unicode Pattern_White_Space)
432const fn is_blankspace(c: char) -> bool {
433    match c {
434        '\u{0020}'
435        | '\u{0009}'..='\u{000d}'
436        | '\u{0085}'
437        | '\u{200e}'
438        | '\u{200f}'
439        | '\u{2028}'
440        | '\u{2029}' => true,
441        _ => false,
442    }
443}
444
445/// Returns whether or not a char is a word start (Unicode XID_Start + '_')
446fn is_word_start(c: char) -> bool {
447    c == '_' || unicode_ident::is_xid_start(c)
448}
449
450/// Returns whether or not a char is a word part (Unicode XID_Continue)
451fn is_word_part(c: char) -> bool {
452    unicode_ident::is_xid_continue(c)
453}
454
455pub(in crate::front::wgsl) struct Lexer<'a> {
456    /// The remaining unconsumed input.
457    input: &'a str,
458
459    /// The full original source code.
460    ///
461    /// We compare `input` against this to compute the lexer's current offset in
462    /// the source.
463    pub(in crate::front::wgsl) source: &'a str,
464
465    /// The byte offset of the end of the most recently returned non-trivia
466    /// token.
467    ///
468    /// This is consulted by the `span_from` function, for finding the
469    /// end of the span for larger structures like expressions or
470    /// statements.
471    last_end_offset: usize,
472
473    /// A stack of unconsumed tokens to which template list discovery has been
474    /// applied.
475    ///
476    /// This is a stack: the next token is at the *end* of the vector, not the
477    /// start. So tokens appear here in the reverse of the order they appear in
478    /// the source.
479    ///
480    /// This doesn't contain the whole source, only those tokens produced by
481    /// [`discover_template_lists`]'s look-ahead, or that have been produced by
482    /// other look-ahead functions like `peek` and `next_if`. When this is empty,
483    /// we call [`discover_template_lists`] to get more.
484    tokens: Vec<(TokenSpan<'a>, &'a str)>,
485
486    /// Whether or not to ignore doc comments.
487    /// If `true`, doc comments are treated as [`Token::Trivia`].
488    ignore_doc_comments: bool,
489
490    /// The set of [enable-extensions] present in the module, determined in a pre-pass.
491    ///
492    /// [enable-extensions]: https://gpuweb.github.io/gpuweb/wgsl/#enable-extensions-sec
493    pub(in crate::front::wgsl) enable_extensions: EnableExtensions,
494}
495
496impl<'a> Lexer<'a> {
497    pub(in crate::front::wgsl) const fn new(input: &'a str, ignore_doc_comments: bool) -> Self {
498        Lexer {
499            input,
500            source: input,
501            last_end_offset: 0,
502            tokens: Vec::new(),
503            enable_extensions: EnableExtensions::empty(),
504            ignore_doc_comments,
505        }
506    }
507
508    /// Check that `extension` is enabled in `self`.
509    pub(in crate::front::wgsl) fn require_enable_extension(
510        &self,
511        extension: ImplementedEnableExtension,
512        span: Span,
513    ) -> Result<'static, ()> {
514        self.enable_extensions.require(extension, span)
515    }
516
517    /// Calls the function with a lexer and returns the result of the function as well as the span for everything the function parsed
518    ///
519    /// # Examples
520    /// ```ignore
521    /// let lexer = Lexer::new("5");
522    /// let (value, span) = lexer.capture_span(Lexer::next_uint_literal);
523    /// assert_eq!(value, 5);
524    /// ```
525    #[inline]
526    pub fn capture_span<T, E>(
527        &mut self,
528        inner: impl FnOnce(&mut Self) -> core::result::Result<T, E>,
529    ) -> core::result::Result<(T, Span), E> {
530        let start = self.current_byte_offset();
531        let res = inner(self)?;
532        let end = self.current_byte_offset();
533        Ok((res, Span::from(start..end)))
534    }
535
536    pub(in crate::front::wgsl) fn start_byte_offset(&mut self) -> usize {
537        loop {
538            // Eat all trivia because `next` doesn't eat trailing trivia.
539            let (token, rest) = consume_token(self.input, false, true);
540            if let Token::Trivia = token {
541                self.input = rest;
542            } else {
543                return self.current_byte_offset();
544            }
545        }
546    }
547
548    /// Collect all module doc comments until a non doc token is found.
549    pub(in crate::front::wgsl) fn accumulate_module_doc_comments(&mut self) -> Vec<&'a str> {
550        let mut doc_comments = Vec::new();
551        loop {
552            // ignore blankspace
553            self.input = consume_any(self.input, is_blankspace).1;
554
555            let (token, rest) = consume_token(self.input, false, self.ignore_doc_comments);
556            if let Token::ModuleDocComment(doc_comment) = token {
557                self.input = rest;
558                doc_comments.push(doc_comment);
559            } else {
560                return doc_comments;
561            }
562        }
563    }
564
565    /// Collect all doc comments until a non doc token is found.
566    pub(in crate::front::wgsl) fn accumulate_doc_comments(&mut self) -> Vec<&'a str> {
567        let mut doc_comments = Vec::new();
568        loop {
569            // ignore blankspace
570            self.input = consume_any(self.input, is_blankspace).1;
571
572            let (token, rest) = consume_token(self.input, false, self.ignore_doc_comments);
573            if let Token::DocComment(doc_comment) = token {
574                self.input = rest;
575                doc_comments.push(doc_comment);
576            } else {
577                return doc_comments;
578            }
579        }
580    }
581
582    const fn current_byte_offset(&self) -> usize {
583        self.source.len() - self.input.len()
584    }
585
586    pub(in crate::front::wgsl) fn span_from(&self, offset: usize) -> Span {
587        Span::from(offset..self.last_end_offset)
588    }
589    pub(in crate::front::wgsl) fn span_with_start(&self, span: Span) -> Span {
590        span.until(&Span::from(0..self.last_end_offset))
591    }
592
593    /// Return the next non-whitespace token from `self`.
594    ///
595    /// Assume we are a parse state where bit shift operators may
596    /// occur, but not angle brackets.
597    #[must_use]
598    pub(in crate::front::wgsl) fn next(&mut self) -> TokenSpan<'a> {
599        self.next_impl(true)
600    }
601
602    #[cfg(test)]
603    pub fn next_with_unignored_doc_comments(&mut self) -> TokenSpan<'a> {
604        self.next_impl(false)
605    }
606
607    /// Return the next non-whitespace token from `self`, with a span.
608    fn next_impl(&mut self, ignore_doc_comments: bool) -> TokenSpan<'a> {
609        loop {
610            if self.tokens.is_empty() {
611                discover_template_lists(
612                    &mut self.tokens,
613                    self.source,
614                    self.input,
615                    ignore_doc_comments || self.ignore_doc_comments,
616                );
617            }
618            assert!(!self.tokens.is_empty());
619            let (token, rest) = self.tokens.pop().unwrap();
620
621            self.input = rest;
622            self.last_end_offset = self.current_byte_offset();
623
624            match token.0 {
625                Token::Trivia => {}
626                _ => return token,
627            }
628        }
629    }
630
631    #[must_use]
632    pub(in crate::front::wgsl) fn peek(&mut self) -> TokenSpan<'a> {
633        let input = self.input;
634        let last_end_offset = self.last_end_offset;
635        let token = self.next();
636        self.tokens.push((token, self.input));
637        self.input = input;
638        self.last_end_offset = last_end_offset;
639        token
640    }
641
642    /// If the next token matches it's consumed and true is returned
643    pub(in crate::front::wgsl) fn next_if(&mut self, what: Token<'_>) -> bool {
644        let input = self.input;
645        let last_end_offset = self.last_end_offset;
646        let token = self.next();
647        if token.0 == what {
648            true
649        } else {
650            self.tokens.push((token, self.input));
651            self.input = input;
652            self.last_end_offset = last_end_offset;
653            false
654        }
655    }
656
657    pub(in crate::front::wgsl) fn expect_span(&mut self, expected: Token<'a>) -> Result<'a, Span> {
658        let next = self.next();
659        if next.0 == expected {
660            Ok(next.1)
661        } else {
662            Err(Box::new(Error::Unexpected(
663                next.1,
664                ExpectedToken::Token(expected),
665            )))
666        }
667    }
668
669    pub(in crate::front::wgsl) fn expect(&mut self, expected: Token<'a>) -> Result<'a, ()> {
670        self.expect_span(expected)?;
671        Ok(())
672    }
673
674    pub(in crate::front::wgsl) fn next_ident_with_span(&mut self) -> Result<'a, (&'a str, Span)> {
675        match self.next() {
676            (Token::Word("_"), span) => Err(Box::new(Error::InvalidIdentifierUnderscore(span))),
677            (Token::Word(word), span) => {
678                if word.starts_with("__") {
679                    Err(Box::new(Error::ReservedIdentifierPrefix(span)))
680                } else {
681                    Ok((word, span))
682                }
683            }
684            (_, span) => Err(Box::new(Error::Unexpected(span, ExpectedToken::Identifier))),
685        }
686    }
687
688    pub(in crate::front::wgsl) fn next_ident(&mut self) -> Result<'a, super::ast::Ident<'a>> {
689        self.next_ident_with_span()
690            .and_then(|(word, span)| Self::word_as_ident(word, span))
691            .map(|(name, span)| super::ast::Ident { name, span })
692    }
693
694    fn word_as_ident(word: &'a str, span: Span) -> Result<'a, (&'a str, Span)> {
695        if crate::keywords::wgsl::RESERVED.contains(&word) {
696            Err(Box::new(Error::ReservedKeyword(span)))
697        } else {
698            Ok((word, span))
699        }
700    }
701
702    pub(in crate::front::wgsl) fn open_arguments(&mut self) -> Result<'a, ()> {
703        self.expect(Token::Paren('('))
704    }
705
706    pub(in crate::front::wgsl) fn next_argument(&mut self) -> Result<'a, bool> {
707        let paren = Token::Paren(')');
708        if self.next_if(Token::Separator(',')) {
709            Ok(!self.next_if(paren))
710        } else {
711            self.expect(paren).map(|()| false)
712        }
713    }
714}
715
716#[cfg(test)]
717#[track_caller]
718fn sub_test(source: &str, expected_tokens: &[Token]) {
719    sub_test_with(true, source, expected_tokens);
720}
721
722#[cfg(test)]
723#[track_caller]
724fn sub_test_with_and_without_doc_comments(source: &str, expected_tokens: &[Token]) {
725    sub_test_with(false, source, expected_tokens);
726    sub_test_with(
727        true,
728        source,
729        expected_tokens
730            .iter()
731            .filter(|v| !matches!(**v, Token::DocComment(_) | Token::ModuleDocComment(_)))
732            .cloned()
733            .collect::<Vec<_>>()
734            .as_slice(),
735    );
736}
737
738#[cfg(test)]
739#[track_caller]
740fn sub_test_with(ignore_doc_comments: bool, source: &str, expected_tokens: &[Token]) {
741    let mut lex = Lexer::new(source, ignore_doc_comments);
742    for &token in expected_tokens {
743        assert_eq!(lex.next_with_unignored_doc_comments().0, token);
744    }
745    assert_eq!(lex.next().0, Token::End);
746}
747
748#[test]
749fn test_numbers() {
750    use half::f16;
751    // WGSL spec examples //
752
753    // decimal integer
754    sub_test(
755        "0x123 0X123u 1u 123 0 0i 0x3f",
756        &[
757            Token::Number(Ok(Number::AbstractInt(291))),
758            Token::Number(Ok(Number::U32(291))),
759            Token::Number(Ok(Number::U32(1))),
760            Token::Number(Ok(Number::AbstractInt(123))),
761            Token::Number(Ok(Number::AbstractInt(0))),
762            Token::Number(Ok(Number::I32(0))),
763            Token::Number(Ok(Number::AbstractInt(63))),
764        ],
765    );
766    // decimal floating point
767    sub_test(
768        "0.e+4f 01. .01 12.34 .0f 0h 1e-3 0xa.fp+2 0x1P+4f 0X.3 0x3p+2h 0X1.fp-4 0x3.2p+2h",
769        &[
770            Token::Number(Ok(Number::F32(0.))),
771            Token::Number(Ok(Number::AbstractFloat(1.))),
772            Token::Number(Ok(Number::AbstractFloat(0.01))),
773            Token::Number(Ok(Number::AbstractFloat(12.34))),
774            Token::Number(Ok(Number::F32(0.))),
775            Token::Number(Ok(Number::F16(f16::from_f32(0.)))),
776            Token::Number(Ok(Number::AbstractFloat(0.001))),
777            Token::Number(Ok(Number::AbstractFloat(43.75))),
778            Token::Number(Ok(Number::F32(16.))),
779            Token::Number(Ok(Number::AbstractFloat(0.1875))),
780            // https://github.com/gfx-rs/wgpu/issues/7046
781            Token::Number(Err(NumberError::NotRepresentable)), // Should be 0.75
782            Token::Number(Ok(Number::AbstractFloat(0.12109375))),
783            // https://github.com/gfx-rs/wgpu/issues/7046
784            Token::Number(Err(NumberError::NotRepresentable)), // Should be 12.5
785        ],
786    );
787
788    // MIN / MAX //
789
790    // min / max decimal integer
791    sub_test(
792        "0i 2147483647i 2147483648i",
793        &[
794            Token::Number(Ok(Number::I32(0))),
795            Token::Number(Ok(Number::I32(i32::MAX))),
796            Token::Number(Err(NumberError::NotRepresentable)),
797        ],
798    );
799    // min / max decimal unsigned integer
800    sub_test(
801        "0u 4294967295u 4294967296u",
802        &[
803            Token::Number(Ok(Number::U32(u32::MIN))),
804            Token::Number(Ok(Number::U32(u32::MAX))),
805            Token::Number(Err(NumberError::NotRepresentable)),
806        ],
807    );
808
809    // min / max hexadecimal signed integer
810    sub_test(
811        "0x0i 0x7FFFFFFFi 0x80000000i",
812        &[
813            Token::Number(Ok(Number::I32(0))),
814            Token::Number(Ok(Number::I32(i32::MAX))),
815            Token::Number(Err(NumberError::NotRepresentable)),
816        ],
817    );
818    // min / max hexadecimal unsigned integer
819    sub_test(
820        "0x0u 0xFFFFFFFFu 0x100000000u",
821        &[
822            Token::Number(Ok(Number::U32(u32::MIN))),
823            Token::Number(Ok(Number::U32(u32::MAX))),
824            Token::Number(Err(NumberError::NotRepresentable)),
825        ],
826    );
827
828    // min/max decimal abstract int
829    sub_test(
830        "0 9223372036854775807 9223372036854775808",
831        &[
832            Token::Number(Ok(Number::AbstractInt(0))),
833            Token::Number(Ok(Number::AbstractInt(i64::MAX))),
834            Token::Number(Err(NumberError::NotRepresentable)),
835        ],
836    );
837
838    // min/max hexadecimal abstract int
839    sub_test(
840        "0 0x7fffffffffffffff 0x8000000000000000",
841        &[
842            Token::Number(Ok(Number::AbstractInt(0))),
843            Token::Number(Ok(Number::AbstractInt(i64::MAX))),
844            Token::Number(Err(NumberError::NotRepresentable)),
845        ],
846    );
847
848    /// ≈ 2^-126 * 2^−23 (= 2^−149)
849    const SMALLEST_POSITIVE_SUBNORMAL_F32: f32 = 1e-45;
850    /// ≈ 2^-126 * (1 − 2^−23)
851    const LARGEST_SUBNORMAL_F32: f32 = 1.1754942e-38;
852    /// ≈ 2^-126
853    const SMALLEST_POSITIVE_NORMAL_F32: f32 = f32::MIN_POSITIVE;
854    /// ≈ 1 − 2^−24
855    const LARGEST_F32_LESS_THAN_ONE: f32 = 0.99999994;
856    /// ≈ 1 + 2^−23
857    const SMALLEST_F32_LARGER_THAN_ONE: f32 = 1.0000001;
858    /// ≈ 2^127 * (2 − 2^−23)
859    const LARGEST_NORMAL_F32: f32 = f32::MAX;
860
861    // decimal floating point
862    sub_test(
863        "1e-45f 1.1754942e-38f 1.17549435e-38f 0.99999994f 1.0000001f 3.40282347e+38f",
864        &[
865            Token::Number(Ok(Number::F32(SMALLEST_POSITIVE_SUBNORMAL_F32))),
866            Token::Number(Ok(Number::F32(LARGEST_SUBNORMAL_F32))),
867            Token::Number(Ok(Number::F32(SMALLEST_POSITIVE_NORMAL_F32))),
868            Token::Number(Ok(Number::F32(LARGEST_F32_LESS_THAN_ONE))),
869            Token::Number(Ok(Number::F32(SMALLEST_F32_LARGER_THAN_ONE))),
870            Token::Number(Ok(Number::F32(LARGEST_NORMAL_F32))),
871        ],
872    );
873    sub_test(
874        "3.40282367e+38f",
875        &[
876            Token::Number(Err(NumberError::NotRepresentable)), // ≈ 2^128
877        ],
878    );
879
880    // hexadecimal floating point
881    sub_test(
882        "0x1p-149f 0x7FFFFFp-149f 0x1p-126f 0xFFFFFFp-24f 0x800001p-23f 0xFFFFFFp+104f",
883        &[
884            Token::Number(Ok(Number::F32(SMALLEST_POSITIVE_SUBNORMAL_F32))),
885            Token::Number(Ok(Number::F32(LARGEST_SUBNORMAL_F32))),
886            Token::Number(Ok(Number::F32(SMALLEST_POSITIVE_NORMAL_F32))),
887            Token::Number(Ok(Number::F32(LARGEST_F32_LESS_THAN_ONE))),
888            Token::Number(Ok(Number::F32(SMALLEST_F32_LARGER_THAN_ONE))),
889            Token::Number(Ok(Number::F32(LARGEST_NORMAL_F32))),
890        ],
891    );
892    sub_test(
893        "0x1p128f 0x1.000001p0f",
894        &[
895            Token::Number(Err(NumberError::NotRepresentable)), // = 2^128
896            Token::Number(Err(NumberError::NotRepresentable)),
897        ],
898    );
899}
900
901#[test]
902fn double_floats() {
903    sub_test(
904        "0x1.2p4lf 0x1p8lf 0.0625lf 625e-4lf 10lf 10l",
905        &[
906            Token::Number(Ok(Number::F64(18.0))),
907            Token::Number(Ok(Number::F64(256.0))),
908            Token::Number(Ok(Number::F64(0.0625))),
909            Token::Number(Ok(Number::F64(0.0625))),
910            Token::Number(Ok(Number::F64(10.0))),
911            Token::Number(Ok(Number::AbstractInt(10))),
912            Token::Word("l"),
913        ],
914    )
915}
916
917#[test]
918fn test_tokens() {
919    sub_test("id123_OK", &[Token::Word("id123_OK")]);
920    sub_test(
921        "92No",
922        &[
923            Token::Number(Ok(Number::AbstractInt(92))),
924            Token::Word("No"),
925        ],
926    );
927    sub_test(
928        "2u3o",
929        &[
930            Token::Number(Ok(Number::U32(2))),
931            Token::Number(Ok(Number::AbstractInt(3))),
932            Token::Word("o"),
933        ],
934    );
935    sub_test(
936        "2.4f44po",
937        &[
938            Token::Number(Ok(Number::F32(2.4))),
939            Token::Number(Ok(Number::AbstractInt(44))),
940            Token::Word("po"),
941        ],
942    );
943    sub_test(
944        "Δέλτα réflexion Кызыл 𐰓𐰏𐰇 朝焼け سلام 검정 שָׁלוֹם गुलाबी փիրուզ",
945        &[
946            Token::Word("Δέλτα"),
947            Token::Word("réflexion"),
948            Token::Word("Кызыл"),
949            Token::Word("𐰓𐰏𐰇"),
950            Token::Word("朝焼け"),
951            Token::Word("سلام"),
952            Token::Word("검정"),
953            Token::Word("שָׁלוֹם"),
954            Token::Word("गुलाबी"),
955            Token::Word("փիրուզ"),
956        ],
957    );
958    sub_test("æNoø", &[Token::Word("æNoø")]);
959    sub_test("No¾", &[Token::Word("No"), Token::Unknown('¾')]);
960    sub_test("No好", &[Token::Word("No好")]);
961    sub_test("_No", &[Token::Word("_No")]);
962
963    sub_test_with_and_without_doc_comments(
964        "*/*/***/*//=/*****//",
965        &[
966            Token::Operation('*'),
967            Token::AssignmentOperation('/'),
968            Token::DocComment("/*****/"),
969            Token::Operation('/'),
970        ],
971    );
972
973    // Type suffixes are only allowed on hex float literals
974    // if you provided an exponent.
975    sub_test(
976        "0x1.2f 0x1.2f 0x1.2h 0x1.2H 0x1.2lf",
977        &[
978            // The 'f' suffixes are taken as a hex digit:
979            // the fractional part is 0x2f / 256.
980            Token::Number(Ok(Number::AbstractFloat(1.0 + 0x2f as f64 / 256.0))),
981            Token::Number(Ok(Number::AbstractFloat(1.0 + 0x2f as f64 / 256.0))),
982            Token::Number(Ok(Number::AbstractFloat(1.125))),
983            Token::Word("h"),
984            Token::Number(Ok(Number::AbstractFloat(1.125))),
985            Token::Word("H"),
986            Token::Number(Ok(Number::AbstractFloat(1.125))),
987            Token::Word("lf"),
988        ],
989    )
990}
991
992#[test]
993fn test_variable_decl() {
994    sub_test(
995        "@group(0 ) var< uniform> texture:   texture_multisampled_2d <f32 >;",
996        &[
997            Token::Attribute,
998            Token::Word("group"),
999            Token::Paren('('),
1000            Token::Number(Ok(Number::AbstractInt(0))),
1001            Token::Paren(')'),
1002            Token::Word("var"),
1003            Token::TemplateArgsStart,
1004            Token::Word("uniform"),
1005            Token::TemplateArgsEnd,
1006            Token::Word("texture"),
1007            Token::Separator(':'),
1008            Token::Word("texture_multisampled_2d"),
1009            Token::TemplateArgsStart,
1010            Token::Word("f32"),
1011            Token::TemplateArgsEnd,
1012            Token::Separator(';'),
1013        ],
1014    );
1015    sub_test(
1016        "var<storage,read_write> buffer: array<u32>;",
1017        &[
1018            Token::Word("var"),
1019            Token::TemplateArgsStart,
1020            Token::Word("storage"),
1021            Token::Separator(','),
1022            Token::Word("read_write"),
1023            Token::TemplateArgsEnd,
1024            Token::Word("buffer"),
1025            Token::Separator(':'),
1026            Token::Word("array"),
1027            Token::TemplateArgsStart,
1028            Token::Word("u32"),
1029            Token::TemplateArgsEnd,
1030            Token::Separator(';'),
1031        ],
1032    );
1033}
1034
1035#[test]
1036fn test_template_list() {
1037    sub_test(
1038        "A<B||C>D",
1039        &[
1040            Token::Word("A"),
1041            Token::Paren('<'),
1042            Token::Word("B"),
1043            Token::LogicalOperation('|'),
1044            Token::Word("C"),
1045            Token::Paren('>'),
1046            Token::Word("D"),
1047        ],
1048    );
1049    sub_test(
1050        "A(B<C,D>(E))",
1051        &[
1052            Token::Word("A"),
1053            Token::Paren('('),
1054            Token::Word("B"),
1055            Token::TemplateArgsStart,
1056            Token::Word("C"),
1057            Token::Separator(','),
1058            Token::Word("D"),
1059            Token::TemplateArgsEnd,
1060            Token::Paren('('),
1061            Token::Word("E"),
1062            Token::Paren(')'),
1063            Token::Paren(')'),
1064        ],
1065    );
1066    sub_test(
1067        "array<i32,select(2,3,A>B)>",
1068        &[
1069            Token::Word("array"),
1070            Token::TemplateArgsStart,
1071            Token::Word("i32"),
1072            Token::Separator(','),
1073            Token::Word("select"),
1074            Token::Paren('('),
1075            Token::Number(Ok(Number::AbstractInt(2))),
1076            Token::Separator(','),
1077            Token::Number(Ok(Number::AbstractInt(3))),
1078            Token::Separator(','),
1079            Token::Word("A"),
1080            Token::Paren('>'),
1081            Token::Word("B"),
1082            Token::Paren(')'),
1083            Token::TemplateArgsEnd,
1084        ],
1085    );
1086    sub_test(
1087        "A[B<C]>D",
1088        &[
1089            Token::Word("A"),
1090            Token::Paren('['),
1091            Token::Word("B"),
1092            Token::Paren('<'),
1093            Token::Word("C"),
1094            Token::Paren(']'),
1095            Token::Paren('>'),
1096            Token::Word("D"),
1097        ],
1098    );
1099    sub_test(
1100        "A<B<<C>",
1101        &[
1102            Token::Word("A"),
1103            Token::TemplateArgsStart,
1104            Token::Word("B"),
1105            Token::ShiftOperation('<'),
1106            Token::Word("C"),
1107            Token::TemplateArgsEnd,
1108        ],
1109    );
1110    sub_test(
1111        "A<(B>=C)>",
1112        &[
1113            Token::Word("A"),
1114            Token::TemplateArgsStart,
1115            Token::Paren('('),
1116            Token::Word("B"),
1117            Token::LogicalOperation('>'),
1118            Token::Word("C"),
1119            Token::Paren(')'),
1120            Token::TemplateArgsEnd,
1121        ],
1122    );
1123    sub_test(
1124        "A<B>=C>",
1125        &[
1126            Token::Word("A"),
1127            Token::TemplateArgsStart,
1128            Token::Word("B"),
1129            Token::TemplateArgsEnd,
1130            Token::Operation('='),
1131            Token::Word("C"),
1132            Token::Paren('>'),
1133        ],
1134    );
1135}
1136
1137#[test]
1138fn test_comments() {
1139    sub_test("// Single comment", &[]);
1140
1141    sub_test(
1142        "/* multi
1143    line
1144    comment */",
1145        &[],
1146    );
1147    sub_test(
1148        "/* multi
1149    line
1150    comment */
1151    // and another",
1152        &[],
1153    );
1154}
1155
1156#[test]
1157fn test_doc_comments() {
1158    sub_test_with_and_without_doc_comments(
1159        "/// Single comment",
1160        &[Token::DocComment("/// Single comment")],
1161    );
1162
1163    sub_test_with_and_without_doc_comments(
1164        "/** multi
1165    line
1166    comment */",
1167        &[Token::DocComment(
1168            "/** multi
1169    line
1170    comment */",
1171        )],
1172    );
1173    sub_test_with_and_without_doc_comments(
1174        "/** multi
1175    line
1176    comment */
1177    /// and another",
1178        &[
1179            Token::DocComment(
1180                "/** multi
1181    line
1182    comment */",
1183            ),
1184            Token::DocComment("/// and another"),
1185        ],
1186    );
1187}
1188
1189#[test]
1190fn test_doc_comment_nested() {
1191    sub_test_with_and_without_doc_comments(
1192        "/**
1193    a comment with nested one /**
1194        nested comment
1195    */
1196    */
1197    const a : i32 = 2;",
1198        &[
1199            Token::DocComment(
1200                "/**
1201    a comment with nested one /**
1202        nested comment
1203    */
1204    */",
1205            ),
1206            Token::Word("const"),
1207            Token::Word("a"),
1208            Token::Separator(':'),
1209            Token::Word("i32"),
1210            Token::Operation('='),
1211            Token::Number(Ok(Number::AbstractInt(2))),
1212            Token::Separator(';'),
1213        ],
1214    );
1215}
1216
1217#[test]
1218fn test_doc_comment_long_character() {
1219    sub_test_with_and_without_doc_comments(
1220        "/// π/2
1221        ///     D(𝐡) = ───────────────────────────────────────────────────
1222///            παₜα_b((𝐡 ⋅ 𝐭)² / αₜ²) + (𝐡 ⋅ 𝐛)² / α_b² +`
1223    const a : i32 = 2;",
1224        &[
1225            Token::DocComment("/// π/2"),
1226            Token::DocComment("///     D(𝐡) = ───────────────────────────────────────────────────"),
1227            Token::DocComment("///            παₜα_b((𝐡 ⋅ 𝐭)² / αₜ²) + (𝐡 ⋅ 𝐛)² / α_b² +`"),
1228            Token::Word("const"),
1229            Token::Word("a"),
1230            Token::Separator(':'),
1231            Token::Word("i32"),
1232            Token::Operation('='),
1233            Token::Number(Ok(Number::AbstractInt(2))),
1234            Token::Separator(';'),
1235        ],
1236    );
1237}
1238
1239#[test]
1240fn test_doc_comments_module() {
1241    sub_test_with_and_without_doc_comments(
1242        "//! Comment Module
1243        //! Another one.
1244        /*! Different module comment */
1245        /// Trying to break module comment
1246        // Trying to break module comment again
1247        //! After a regular comment is ok.
1248        /*! Different module comment again */
1249
1250        //! After a break is supported.
1251        const
1252        //! After anything else is not.",
1253        &[
1254            Token::ModuleDocComment("//! Comment Module"),
1255            Token::ModuleDocComment("//! Another one."),
1256            Token::ModuleDocComment("/*! Different module comment */"),
1257            Token::DocComment("/// Trying to break module comment"),
1258            Token::ModuleDocComment("//! After a regular comment is ok."),
1259            Token::ModuleDocComment("/*! Different module comment again */"),
1260            Token::ModuleDocComment("//! After a break is supported."),
1261            Token::Word("const"),
1262            Token::ModuleDocComment("//! After anything else is not."),
1263        ],
1264    );
1265}