naga/front/wgsl/parse/
lexer.rs

1use super::{number::consume_number, Error, ExpectedToken, Result};
2use crate::front::wgsl::error::NumberError;
3use crate::front::wgsl::parse::directive::enable_extension::{
4    EnableExtensions, ImplementedEnableExtension,
5};
6use crate::front::wgsl::parse::Number;
7use crate::Span;
8
9use alloc::{boxed::Box, vec::Vec};
10
11pub type TokenSpan<'a> = (Token<'a>, Span);
12
13#[derive(Copy, Clone, Debug, PartialEq)]
14pub enum Token<'a> {
15    /// A separator character: `:;,`, and `.` when not part of a numeric
16    /// literal.
17    Separator(char),
18
19    /// A parenthesis-like character: `()[]{}`, and also `<>`.
20    ///
21    /// Note that `<>` representing template argument brackets are distinguished
22    /// using WGSL's [template list discovery algorithm][tlda], and are returned
23    /// as [`Token::TemplateArgsStart`] and [`Token::TemplateArgsEnd`]. That is,
24    /// we use `Paren` for `<>` when they are *not* parens.
25    ///
26    /// [tlda]: https://gpuweb.github.io/gpuweb/wgsl/#template-list-discovery
27    Paren(char),
28
29    /// The attribute introduction character `@`.
30    Attribute,
31
32    /// A numeric literal, either integral or floating-point, including any
33    /// type suffix.
34    Number(core::result::Result<Number, NumberError>),
35
36    /// An identifier, possibly a reserved word.
37    Word(&'a str),
38
39    /// A miscellaneous single-character operator, like an arithmetic unary or
40    /// binary operator. This includes `=`, for assignment and initialization.
41    Operation(char),
42
43    /// Certain multi-character logical operators: `!=`, `==`, `&&`,
44    /// `||`, `<=` and `>=`. The value gives the operator's first
45    /// character.
46    ///
47    /// For `<` and `>` operators, see [`Token::Paren`].
48    LogicalOperation(char),
49
50    /// A shift operator: `>>` or `<<`.
51    ShiftOperation(char),
52
53    /// A compound assignment operator like `+=`.
54    ///
55    /// When the given character is `<` or `>`, those represent the left shift
56    /// and right shift assignment operators, `<<=` and `>>=`.
57    AssignmentOperation(char),
58
59    /// The `++` operator.
60    IncrementOperation,
61
62    /// The `--` operator.
63    DecrementOperation,
64
65    /// The `->` token.
66    Arrow,
67
68    /// A `<` representing the start of a template argument list, according to
69    /// WGSL's [template list discovery algorithm][tlda].
70    ///
71    /// [tlda]: https://gpuweb.github.io/gpuweb/wgsl/#template-list-discovery
72    TemplateArgsStart,
73
74    /// A `>` representing the end of a template argument list, according to
75    /// WGSL's [template list discovery algorithm][tlda].
76    ///
77    /// [tlda]: https://gpuweb.github.io/gpuweb/wgsl/#template-list-discovery
78    TemplateArgsEnd,
79
80    /// A character that does not represent a legal WGSL token.
81    Unknown(char),
82
83    /// Comment or whitespace.
84    Trivia,
85
86    /// A doc comment, beginning with `///` or `/**`.
87    DocComment(&'a str),
88
89    /// A module-level doc comment, beginning with `//!` or `/*!`.
90    ModuleDocComment(&'a str),
91
92    /// A block comment that is incomplete, and has not been closed with */.
93    ///
94    /// It's expected that the parser will consider this to be an error.
95    UnterminatedBlockComment(&'a str),
96
97    /// The end of the input.
98    End,
99}
100
101fn consume_any(input: &str, what: impl Fn(char) -> bool) -> (&str, &str) {
102    let pos = input.find(|c| !what(c)).unwrap_or(input.len());
103    input.split_at(pos)
104}
105
106struct UnclosedCandidate {
107    index: usize,
108    depth: usize,
109}
110
111/// Produce at least one token, distinguishing [template lists] from other uses
112/// of `<` and `>`.
113///
114/// Consume one or more tokens from `input` and store them in `tokens`, updating
115/// `input` to refer to the remaining text. Apply WGSL's [template list
116/// discovery algorithm] to decide what sort of tokens `<` and `>` characters in
117/// the input actually represent.
118///
119/// Store the tokens in `tokens` in the *reverse* of the order they appear in
120/// the text, such that the caller can pop from the end of the vector to see the
121/// tokens in textual order.
122///
123/// The `tokens` vector must be empty on entry. The idea is for the caller to
124/// use it as a buffer of unconsumed tokens, and call this function to refill it
125/// when it's empty.
126///
127/// The `source` argument must be the whole original source code, used to
128/// compute spans.
129///
130/// If `ignore_doc_comments` is true, then doc comments are returned as
131/// [`Token::Trivia`], like ordinary comments.
132///
133/// [template lists]: https://gpuweb.github.io/gpuweb/wgsl/#template-lists-sec
134/// [template list discovery algorithm]: https://gpuweb.github.io/gpuweb/wgsl/#template-list-discovery
135fn discover_template_lists<'a>(
136    tokens: &mut Vec<(TokenSpan<'a>, &'a str)>,
137    source: &'a str,
138    mut input: &'a str,
139    ignore_doc_comments: bool,
140) {
141    assert!(tokens.is_empty());
142
143    let mut looking_for_template_start = false;
144    let mut pending: Vec<UnclosedCandidate> = Vec::new();
145
146    // Current nesting depth of `()` and `[]` brackets. (`{}` brackets
147    // exit all template list processing.)
148    let mut depth = 0;
149
150    fn pop_until(pending: &mut Vec<UnclosedCandidate>, depth: usize) {
151        while pending
152            .last()
153            .map(|candidate| candidate.depth >= depth)
154            .unwrap_or(false)
155        {
156            pending.pop();
157        }
158    }
159
160    loop {
161        // Decide whether `consume_token` should treat a `>` character as
162        // `TemplateArgsEnd`, without considering the characters that follow.
163        //
164        // This condition matches the one that determines whether the spec's
165        // template list discovery algorithm looks past a `>` character for a
166        // `=`. By passing this flag to `consume_token`, we ensure it follows
167        // that behavior.
168        let waiting_for_template_end = pending
169            .last()
170            .is_some_and(|candidate| candidate.depth == depth);
171
172        // Ask `consume_token` for the next token and add it to `tokens`, along
173        // with its span.
174        //
175        // This means that `<` enters the buffer as `Token::Paren('<')`, the
176        // ordinary comparison operator. We'll change that to
177        // `Token::TemplateArgsStart` later if appropriate.
178        let (token, rest) = consume_token(input, waiting_for_template_end, ignore_doc_comments);
179        let span = Span::from(source.len() - input.len()..source.len() - rest.len());
180        tokens.push(((token, span), rest));
181        input = rest;
182
183        // Since `consume_token` treats `<<=`, `<<` and `<=` as operators, not
184        // `Token::Paren`, that takes care of the WGSL algorithm's post-'<' lookahead
185        // for us.
186        match token {
187            Token::Word(_) => {
188                looking_for_template_start = true;
189                continue;
190            }
191            Token::Trivia | Token::DocComment(_) | Token::ModuleDocComment(_)
192                if looking_for_template_start =>
193            {
194                continue;
195            }
196            Token::Paren('<') if looking_for_template_start => {
197                pending.push(UnclosedCandidate {
198                    index: tokens.len() - 1,
199                    depth,
200                });
201            }
202            Token::TemplateArgsEnd => {
203                // The `consume_token` function only returns `TemplateArgsEnd`
204                // if `waiting_for_template_end` is true, so we know `pending`
205                // has a top entry at the appropriate depth.
206                //
207                // Find the matching `<` token and change its type to
208                // `TemplateArgsStart`.
209                let candidate = pending.pop().unwrap();
210                let &mut ((ref mut token, _), _) = tokens.get_mut(candidate.index).unwrap();
211                *token = Token::TemplateArgsStart;
212            }
213            Token::Paren('(' | '[') => {
214                depth += 1;
215            }
216            Token::Paren(')' | ']') => {
217                pop_until(&mut pending, depth);
218                depth = depth.saturating_sub(1);
219            }
220            Token::Operation('=') | Token::Separator(':' | ';') | Token::Paren('{') => {
221                pending.clear();
222                depth = 0;
223            }
224            Token::LogicalOperation('&') | Token::LogicalOperation('|') => {
225                pop_until(&mut pending, depth);
226            }
227            Token::End => break,
228            _ => {}
229        }
230
231        looking_for_template_start = false;
232
233        // The WGSL spec's template list discovery algorithm processes the
234        // entire source at once, but Naga would rather limit its lookahead to
235        // the actual text that could possibly be a template parameter list.
236        // This is usually less than a line.
237        if pending.is_empty() {
238            break;
239        }
240    }
241
242    tokens.reverse();
243}
244
245/// Return the token at the start of `input`.
246///
247/// The `waiting_for_template_end` flag enables some special handling to help out
248/// `discover_template_lists`:
249///
250/// - If `waiting_for_template_end` is `true`, then return text starting with
251///   '>` as [`Token::TemplateArgsEnd`] and consume only the `>` character,
252///   regardless of what characters follow it. This is required by the [template
253///   list discovery algorithm][tlda] when the `>` would end a template argument list.
254///
255/// - If `waiting_for_template_end` is false, recognize multi-character tokens
256///   beginning with `>` as usual.
257///
258/// If `ignore_doc_comments` is true, then doc comments are returned as
259/// [`Token::Trivia`], like ordinary comments.
260///
261/// [tlda]: https://gpuweb.github.io/gpuweb/wgsl/#template-list-discovery
262fn consume_token(
263    input: &str,
264    waiting_for_template_end: bool,
265    ignore_doc_comments: bool,
266) -> (Token<'_>, &str) {
267    let mut chars = input.chars();
268    let cur = match chars.next() {
269        Some(c) => c,
270        None => return (Token::End, ""),
271    };
272    match cur {
273        ':' | ';' | ',' => (Token::Separator(cur), chars.as_str()),
274        '.' => {
275            let og_chars = chars.as_str();
276            match chars.next() {
277                Some('0'..='9') => consume_number(input),
278                _ => (Token::Separator(cur), og_chars),
279            }
280        }
281        '@' => (Token::Attribute, chars.as_str()),
282        '(' | ')' | '{' | '}' | '[' | ']' => (Token::Paren(cur), chars.as_str()),
283        '<' | '>' => {
284            let og_chars = chars.as_str();
285            if cur == '>' && waiting_for_template_end {
286                return (Token::TemplateArgsEnd, og_chars);
287            }
288            match chars.next() {
289                Some('=') => (Token::LogicalOperation(cur), chars.as_str()),
290                Some(c) if c == cur => {
291                    let og_chars = chars.as_str();
292                    match chars.next() {
293                        Some('=') => (Token::AssignmentOperation(cur), chars.as_str()),
294                        _ => (Token::ShiftOperation(cur), og_chars),
295                    }
296                }
297                _ => (Token::Paren(cur), og_chars),
298            }
299        }
300        '0'..='9' => consume_number(input),
301        '/' => {
302            let og_chars = chars.as_str();
303            match chars.next() {
304                Some('/') => {
305                    let mut input_chars = input.char_indices();
306                    let doc_comment_end = input_chars
307                        .find_map(|(index, c)| is_comment_end(c).then_some(index))
308                        .unwrap_or(input.len());
309                    let token = match chars.next() {
310                        Some('/') if !ignore_doc_comments => {
311                            Token::DocComment(&input[..doc_comment_end])
312                        }
313                        Some('!') if !ignore_doc_comments => {
314                            Token::ModuleDocComment(&input[..doc_comment_end])
315                        }
316                        _ => Token::Trivia,
317                    };
318                    (token, input_chars.as_str())
319                }
320                Some('*') => {
321                    let next_c = chars.next();
322
323                    enum CommentType {
324                        Doc,
325                        ModuleDoc,
326                        Normal,
327                    }
328                    let comment_type = match next_c {
329                        Some('*') if !ignore_doc_comments => CommentType::Doc,
330                        Some('!') if !ignore_doc_comments => CommentType::ModuleDoc,
331                        _ => CommentType::Normal,
332                    };
333
334                    let mut depth = 1;
335                    let mut prev = next_c;
336
337                    for c in &mut chars {
338                        match (prev, c) {
339                            (Some('*'), '/') => {
340                                prev = None;
341                                depth -= 1;
342                                if depth == 0 {
343                                    let rest = chars.as_str();
344                                    let token = match comment_type {
345                                        CommentType::Doc => {
346                                            let doc_comment_end = input.len() - rest.len();
347                                            Token::DocComment(&input[..doc_comment_end])
348                                        }
349                                        CommentType::ModuleDoc => {
350                                            let doc_comment_end = input.len() - rest.len();
351                                            Token::ModuleDocComment(&input[..doc_comment_end])
352                                        }
353                                        CommentType::Normal => Token::Trivia,
354                                    };
355                                    return (token, rest);
356                                }
357                            }
358                            (Some('/'), '*') => {
359                                prev = None;
360                                depth += 1;
361                            }
362                            _ => {
363                                prev = Some(c);
364                            }
365                        }
366                    }
367
368                    (Token::UnterminatedBlockComment(input), "")
369                }
370                Some('=') => (Token::AssignmentOperation(cur), chars.as_str()),
371                _ => (Token::Operation(cur), og_chars),
372            }
373        }
374        '-' => {
375            let og_chars = chars.as_str();
376            match chars.next() {
377                Some('>') => (Token::Arrow, chars.as_str()),
378                Some('-') => (Token::DecrementOperation, chars.as_str()),
379                Some('=') => (Token::AssignmentOperation(cur), chars.as_str()),
380                _ => (Token::Operation(cur), og_chars),
381            }
382        }
383        '+' => {
384            let og_chars = chars.as_str();
385            match chars.next() {
386                Some('+') => (Token::IncrementOperation, chars.as_str()),
387                Some('=') => (Token::AssignmentOperation(cur), chars.as_str()),
388                _ => (Token::Operation(cur), og_chars),
389            }
390        }
391        '*' | '%' | '^' => {
392            let og_chars = chars.as_str();
393            match chars.next() {
394                Some('=') => (Token::AssignmentOperation(cur), chars.as_str()),
395                _ => (Token::Operation(cur), og_chars),
396            }
397        }
398        '~' => (Token::Operation(cur), chars.as_str()),
399        '=' | '!' => {
400            let og_chars = chars.as_str();
401            match chars.next() {
402                Some('=') => (Token::LogicalOperation(cur), chars.as_str()),
403                _ => (Token::Operation(cur), og_chars),
404            }
405        }
406        '&' | '|' => {
407            let og_chars = chars.as_str();
408            match chars.next() {
409                Some(c) if c == cur => (Token::LogicalOperation(cur), chars.as_str()),
410                Some('=') => (Token::AssignmentOperation(cur), chars.as_str()),
411                _ => (Token::Operation(cur), og_chars),
412            }
413        }
414        _ if is_blankspace(cur) => {
415            let (_, rest) = consume_any(input, is_blankspace);
416            (Token::Trivia, rest)
417        }
418        _ if is_word_start(cur) => {
419            let (word, rest) = consume_any(input, is_word_part);
420            (Token::Word(word), rest)
421        }
422        _ => (Token::Unknown(cur), chars.as_str()),
423    }
424}
425
426/// Returns whether or not a char is a comment end
427/// (Unicode Pattern_White_Space excluding U+0020, U+0009, U+200E and U+200F)
428/// <https://www.w3.org/TR/WGSL/#line-break>
429const fn is_comment_end(c: char) -> bool {
430    match c {
431        '\u{000a}'..='\u{000d}' | '\u{0085}' | '\u{2028}' | '\u{2029}' => true,
432        _ => false,
433    }
434}
435
436/// Returns whether or not a char is a blankspace (Unicode Pattern_White_Space)
437const fn is_blankspace(c: char) -> bool {
438    match c {
439        '\u{0020}'
440        | '\u{0009}'..='\u{000d}'
441        | '\u{0085}'
442        | '\u{200e}'
443        | '\u{200f}'
444        | '\u{2028}'
445        | '\u{2029}' => true,
446        _ => false,
447    }
448}
449
450/// Returns whether or not a char is a word start (Unicode XID_Start + '_')
451fn is_word_start(c: char) -> bool {
452    c == '_' || unicode_ident::is_xid_start(c)
453}
454
455/// Returns whether or not a char is a word part (Unicode XID_Continue)
456fn is_word_part(c: char) -> bool {
457    unicode_ident::is_xid_continue(c)
458}
459
460pub(in crate::front::wgsl) struct Lexer<'a> {
461    /// The remaining unconsumed input.
462    input: &'a str,
463
464    /// The full original source code.
465    ///
466    /// We compare `input` against this to compute the lexer's current offset in
467    /// the source.
468    pub(in crate::front::wgsl) source: &'a str,
469
470    /// The byte offset of the end of the most recently returned non-trivia
471    /// token.
472    ///
473    /// This is consulted by the `span_from` function, for finding the
474    /// end of the span for larger structures like expressions or
475    /// statements.
476    last_end_offset: usize,
477
478    /// A stack of unconsumed tokens to which template list discovery has been
479    /// applied.
480    ///
481    /// This is a stack: the next token is at the *end* of the vector, not the
482    /// start. So tokens appear here in the reverse of the order they appear in
483    /// the source.
484    ///
485    /// This doesn't contain the whole source, only those tokens produced by
486    /// [`discover_template_lists`]'s look-ahead, or that have been produced by
487    /// other look-ahead functions like `peek` and `next_if`. When this is empty,
488    /// we call [`discover_template_lists`] to get more.
489    tokens: Vec<(TokenSpan<'a>, &'a str)>,
490
491    /// Whether or not to ignore doc comments.
492    /// If `true`, doc comments are treated as [`Token::Trivia`].
493    ignore_doc_comments: bool,
494
495    /// The set of [enable-extensions] present in the module, determined in a pre-pass.
496    ///
497    /// [enable-extensions]: https://gpuweb.github.io/gpuweb/wgsl/#enable-extensions-sec
498    pub(in crate::front::wgsl) enable_extensions: EnableExtensions,
499}
500
501impl<'a> Lexer<'a> {
502    pub(in crate::front::wgsl) const fn new(input: &'a str, ignore_doc_comments: bool) -> Self {
503        Lexer {
504            input,
505            source: input,
506            last_end_offset: 0,
507            tokens: Vec::new(),
508            enable_extensions: EnableExtensions::empty(),
509            ignore_doc_comments,
510        }
511    }
512
513    /// Check that `extension` is enabled in `self`.
514    pub(in crate::front::wgsl) fn require_enable_extension(
515        &self,
516        extension: ImplementedEnableExtension,
517        span: Span,
518    ) -> Result<'static, ()> {
519        self.enable_extensions.require(extension, span)
520    }
521
522    /// Calls the function with a lexer and returns the result of the function as well as the span for everything the function parsed
523    ///
524    /// # Examples
525    /// ```ignore
526    /// let lexer = Lexer::new("5");
527    /// let (value, span) = lexer.capture_span(Lexer::next_uint_literal);
528    /// assert_eq!(value, 5);
529    /// ```
530    #[inline]
531    pub fn capture_span<T, E>(
532        &mut self,
533        inner: impl FnOnce(&mut Self) -> core::result::Result<T, E>,
534    ) -> core::result::Result<(T, Span), E> {
535        let start = self.current_byte_offset();
536        let res = inner(self)?;
537        let end = self.current_byte_offset();
538        Ok((res, Span::from(start..end)))
539    }
540
541    pub(in crate::front::wgsl) fn start_byte_offset(&mut self) -> usize {
542        loop {
543            // Eat all trivia because `next` doesn't eat trailing trivia.
544            let (token, rest) = consume_token(self.input, false, true);
545            if let Token::Trivia = token {
546                self.input = rest;
547            } else {
548                return self.current_byte_offset();
549            }
550        }
551    }
552
553    /// Collect all module doc comments until a non doc token is found.
554    pub(in crate::front::wgsl) fn accumulate_module_doc_comments(&mut self) -> Vec<&'a str> {
555        let mut doc_comments = Vec::new();
556        loop {
557            // ignore blankspace
558            self.input = consume_any(self.input, is_blankspace).1;
559
560            let (token, rest) = consume_token(self.input, false, self.ignore_doc_comments);
561            if let Token::ModuleDocComment(doc_comment) = token {
562                self.input = rest;
563                doc_comments.push(doc_comment);
564            } else {
565                return doc_comments;
566            }
567        }
568    }
569
570    /// Collect all doc comments until a non doc token is found.
571    pub(in crate::front::wgsl) fn accumulate_doc_comments(&mut self) -> Vec<&'a str> {
572        let mut doc_comments = Vec::new();
573        loop {
574            // ignore blankspace
575            self.input = consume_any(self.input, is_blankspace).1;
576
577            let (token, rest) = consume_token(self.input, false, self.ignore_doc_comments);
578            if let Token::DocComment(doc_comment) = token {
579                self.input = rest;
580                doc_comments.push(doc_comment);
581            } else {
582                return doc_comments;
583            }
584        }
585    }
586
587    const fn current_byte_offset(&self) -> usize {
588        self.source.len() - self.input.len()
589    }
590
591    pub(in crate::front::wgsl) fn span_from(&self, offset: usize) -> Span {
592        Span::from(offset..self.last_end_offset)
593    }
594    pub(in crate::front::wgsl) fn span_with_start(&self, span: Span) -> Span {
595        span.until(&Span::from(0..self.last_end_offset))
596    }
597
598    /// Return the next non-whitespace token from `self`.
599    ///
600    /// Assume we are a parse state where bit shift operators may
601    /// occur, but not angle brackets.
602    #[must_use]
603    pub(in crate::front::wgsl) fn next(&mut self) -> TokenSpan<'a> {
604        self.next_impl(true)
605    }
606
607    #[cfg(test)]
608    pub fn next_with_unignored_doc_comments(&mut self) -> TokenSpan<'a> {
609        self.next_impl(false)
610    }
611
612    /// Return the next non-whitespace token from `self`, with a span.
613    fn next_impl(&mut self, ignore_doc_comments: bool) -> TokenSpan<'a> {
614        loop {
615            if self.tokens.is_empty() {
616                discover_template_lists(
617                    &mut self.tokens,
618                    self.source,
619                    self.input,
620                    ignore_doc_comments || self.ignore_doc_comments,
621                );
622            }
623            assert!(!self.tokens.is_empty());
624            let (token, rest) = self.tokens.pop().unwrap();
625
626            self.input = rest;
627            self.last_end_offset = self.current_byte_offset();
628
629            match token.0 {
630                Token::Trivia => {}
631                _ => return token,
632            }
633        }
634    }
635
636    #[must_use]
637    pub(in crate::front::wgsl) fn peek(&mut self) -> TokenSpan<'a> {
638        let input = self.input;
639        let last_end_offset = self.last_end_offset;
640        let token = self.next();
641        self.tokens.push((token, self.input));
642        self.input = input;
643        self.last_end_offset = last_end_offset;
644        token
645    }
646
647    /// If the next token matches it's consumed and true is returned
648    pub(in crate::front::wgsl) fn next_if(&mut self, what: Token<'_>) -> bool {
649        let input = self.input;
650        let last_end_offset = self.last_end_offset;
651        let token = self.next();
652        if token.0 == what {
653            true
654        } else {
655            self.tokens.push((token, self.input));
656            self.input = input;
657            self.last_end_offset = last_end_offset;
658            false
659        }
660    }
661
662    pub(in crate::front::wgsl) fn expect_span(&mut self, expected: Token<'a>) -> Result<'a, Span> {
663        let next = self.next();
664        if next.0 == expected {
665            Ok(next.1)
666        } else {
667            Err(Box::new(Error::Unexpected(
668                next.1,
669                ExpectedToken::Token(expected),
670            )))
671        }
672    }
673
674    pub(in crate::front::wgsl) fn expect(&mut self, expected: Token<'a>) -> Result<'a, ()> {
675        self.expect_span(expected)?;
676        Ok(())
677    }
678
679    pub(in crate::front::wgsl) fn next_ident_with_span(&mut self) -> Result<'a, (&'a str, Span)> {
680        match self.next() {
681            (Token::Word("_"), span) => Err(Box::new(Error::InvalidIdentifierUnderscore(span))),
682            (Token::Word(word), span) => {
683                if word.starts_with("__") {
684                    Err(Box::new(Error::ReservedIdentifierPrefix(span)))
685                } else {
686                    Ok((word, span))
687                }
688            }
689            (_, span) => Err(Box::new(Error::Unexpected(span, ExpectedToken::Identifier))),
690        }
691    }
692
693    pub(in crate::front::wgsl) fn next_ident(&mut self) -> Result<'a, super::ast::Ident<'a>> {
694        self.next_ident_with_span()
695            .and_then(|(word, span)| Self::word_as_ident(word, span))
696            .map(|(name, span)| super::ast::Ident { name, span })
697    }
698
699    fn word_as_ident(word: &'a str, span: Span) -> Result<'a, (&'a str, Span)> {
700        if crate::keywords::wgsl::RESERVED.contains(&word) {
701            Err(Box::new(Error::ReservedKeyword(span)))
702        } else {
703            Ok((word, span))
704        }
705    }
706
707    pub(in crate::front::wgsl) fn open_arguments(&mut self) -> Result<'a, ()> {
708        self.expect(Token::Paren('('))
709    }
710
711    pub(in crate::front::wgsl) fn next_argument(&mut self) -> Result<'a, bool> {
712        let paren = Token::Paren(')');
713        if self.next_if(Token::Separator(',')) {
714            Ok(!self.next_if(paren))
715        } else {
716            self.expect(paren).map(|()| false)
717        }
718    }
719}
720
721#[cfg(test)]
722#[track_caller]
723fn sub_test(source: &str, expected_tokens: &[Token]) {
724    sub_test_with(true, source, expected_tokens);
725}
726
727#[cfg(test)]
728#[track_caller]
729fn sub_test_with_and_without_doc_comments(source: &str, expected_tokens: &[Token]) {
730    sub_test_with(false, source, expected_tokens);
731    sub_test_with(
732        true,
733        source,
734        expected_tokens
735            .iter()
736            .filter(|v| !matches!(**v, Token::DocComment(_) | Token::ModuleDocComment(_)))
737            .cloned()
738            .collect::<Vec<_>>()
739            .as_slice(),
740    );
741}
742
743#[cfg(test)]
744#[track_caller]
745fn sub_test_with(ignore_doc_comments: bool, source: &str, expected_tokens: &[Token]) {
746    let mut lex = Lexer::new(source, ignore_doc_comments);
747    for &token in expected_tokens {
748        assert_eq!(lex.next_with_unignored_doc_comments().0, token);
749    }
750    assert_eq!(lex.next().0, Token::End);
751}
752
753#[test]
754fn test_numbers() {
755    use half::f16;
756    // WGSL spec examples //
757
758    // decimal integer
759    sub_test(
760        "0x123 0X123u 1u 123 0 0i 0x3f",
761        &[
762            Token::Number(Ok(Number::AbstractInt(291))),
763            Token::Number(Ok(Number::U32(291))),
764            Token::Number(Ok(Number::U32(1))),
765            Token::Number(Ok(Number::AbstractInt(123))),
766            Token::Number(Ok(Number::AbstractInt(0))),
767            Token::Number(Ok(Number::I32(0))),
768            Token::Number(Ok(Number::AbstractInt(63))),
769        ],
770    );
771    // decimal floating point
772    sub_test(
773        "0.e+4f 01. .01 12.34 .0f 0h 1e-3 0xa.fp+2 0x1P+4f 0X.3 0x3p+2h 0X1.fp-4 0x3.2p+2h",
774        &[
775            Token::Number(Ok(Number::F32(0.))),
776            Token::Number(Ok(Number::AbstractFloat(1.))),
777            Token::Number(Ok(Number::AbstractFloat(0.01))),
778            Token::Number(Ok(Number::AbstractFloat(12.34))),
779            Token::Number(Ok(Number::F32(0.))),
780            Token::Number(Ok(Number::F16(f16::from_f32(0.)))),
781            Token::Number(Ok(Number::AbstractFloat(0.001))),
782            Token::Number(Ok(Number::AbstractFloat(43.75))),
783            Token::Number(Ok(Number::F32(16.))),
784            Token::Number(Ok(Number::AbstractFloat(0.1875))),
785            // https://github.com/gfx-rs/wgpu/issues/7046
786            Token::Number(Err(NumberError::NotRepresentable)), // Should be 0.75
787            Token::Number(Ok(Number::AbstractFloat(0.12109375))),
788            // https://github.com/gfx-rs/wgpu/issues/7046
789            Token::Number(Err(NumberError::NotRepresentable)), // Should be 12.5
790        ],
791    );
792
793    // MIN / MAX //
794
795    // min / max decimal integer
796    sub_test(
797        "0i 2147483647i 2147483648i",
798        &[
799            Token::Number(Ok(Number::I32(0))),
800            Token::Number(Ok(Number::I32(i32::MAX))),
801            Token::Number(Err(NumberError::NotRepresentable)),
802        ],
803    );
804    // min / max decimal unsigned integer
805    sub_test(
806        "0u 4294967295u 4294967296u",
807        &[
808            Token::Number(Ok(Number::U32(u32::MIN))),
809            Token::Number(Ok(Number::U32(u32::MAX))),
810            Token::Number(Err(NumberError::NotRepresentable)),
811        ],
812    );
813
814    // min / max hexadecimal signed integer
815    sub_test(
816        "0x0i 0x7FFFFFFFi 0x80000000i",
817        &[
818            Token::Number(Ok(Number::I32(0))),
819            Token::Number(Ok(Number::I32(i32::MAX))),
820            Token::Number(Err(NumberError::NotRepresentable)),
821        ],
822    );
823    // min / max hexadecimal unsigned integer
824    sub_test(
825        "0x0u 0xFFFFFFFFu 0x100000000u",
826        &[
827            Token::Number(Ok(Number::U32(u32::MIN))),
828            Token::Number(Ok(Number::U32(u32::MAX))),
829            Token::Number(Err(NumberError::NotRepresentable)),
830        ],
831    );
832
833    // min/max decimal abstract int
834    sub_test(
835        "0 9223372036854775807 9223372036854775808",
836        &[
837            Token::Number(Ok(Number::AbstractInt(0))),
838            Token::Number(Ok(Number::AbstractInt(i64::MAX))),
839            Token::Number(Err(NumberError::NotRepresentable)),
840        ],
841    );
842
843    // min/max hexadecimal abstract int
844    sub_test(
845        "0 0x7fffffffffffffff 0x8000000000000000",
846        &[
847            Token::Number(Ok(Number::AbstractInt(0))),
848            Token::Number(Ok(Number::AbstractInt(i64::MAX))),
849            Token::Number(Err(NumberError::NotRepresentable)),
850        ],
851    );
852
853    /// ≈ 2^-126 * 2^−23 (= 2^−149)
854    const SMALLEST_POSITIVE_SUBNORMAL_F32: f32 = 1e-45;
855    /// ≈ 2^-126 * (1 − 2^−23)
856    const LARGEST_SUBNORMAL_F32: f32 = 1.1754942e-38;
857    /// ≈ 2^-126
858    const SMALLEST_POSITIVE_NORMAL_F32: f32 = f32::MIN_POSITIVE;
859    /// ≈ 1 − 2^−24
860    const LARGEST_F32_LESS_THAN_ONE: f32 = 0.99999994;
861    /// ≈ 1 + 2^−23
862    const SMALLEST_F32_LARGER_THAN_ONE: f32 = 1.0000001;
863    /// ≈ 2^127 * (2 − 2^−23)
864    const LARGEST_NORMAL_F32: f32 = f32::MAX;
865
866    // decimal floating point
867    sub_test(
868        "1e-45f 1.1754942e-38f 1.17549435e-38f 0.99999994f 1.0000001f 3.40282347e+38f",
869        &[
870            Token::Number(Ok(Number::F32(SMALLEST_POSITIVE_SUBNORMAL_F32))),
871            Token::Number(Ok(Number::F32(LARGEST_SUBNORMAL_F32))),
872            Token::Number(Ok(Number::F32(SMALLEST_POSITIVE_NORMAL_F32))),
873            Token::Number(Ok(Number::F32(LARGEST_F32_LESS_THAN_ONE))),
874            Token::Number(Ok(Number::F32(SMALLEST_F32_LARGER_THAN_ONE))),
875            Token::Number(Ok(Number::F32(LARGEST_NORMAL_F32))),
876        ],
877    );
878    sub_test(
879        "3.40282367e+38f",
880        &[
881            Token::Number(Err(NumberError::NotRepresentable)), // ≈ 2^128
882        ],
883    );
884
885    // hexadecimal floating point
886    sub_test(
887        "0x1p-149f 0x7FFFFFp-149f 0x1p-126f 0xFFFFFFp-24f 0x800001p-23f 0xFFFFFFp+104f",
888        &[
889            Token::Number(Ok(Number::F32(SMALLEST_POSITIVE_SUBNORMAL_F32))),
890            Token::Number(Ok(Number::F32(LARGEST_SUBNORMAL_F32))),
891            Token::Number(Ok(Number::F32(SMALLEST_POSITIVE_NORMAL_F32))),
892            Token::Number(Ok(Number::F32(LARGEST_F32_LESS_THAN_ONE))),
893            Token::Number(Ok(Number::F32(SMALLEST_F32_LARGER_THAN_ONE))),
894            Token::Number(Ok(Number::F32(LARGEST_NORMAL_F32))),
895        ],
896    );
897    sub_test(
898        "0x1p128f 0x1.000001p0f",
899        &[
900            Token::Number(Err(NumberError::NotRepresentable)), // = 2^128
901            Token::Number(Err(NumberError::NotRepresentable)),
902        ],
903    );
904}
905
906#[test]
907fn double_floats() {
908    sub_test(
909        "0x1.2p4lf 0x1p8lf 0.0625lf 625e-4lf 10lf 10l",
910        &[
911            Token::Number(Ok(Number::F64(18.0))),
912            Token::Number(Ok(Number::F64(256.0))),
913            Token::Number(Ok(Number::F64(0.0625))),
914            Token::Number(Ok(Number::F64(0.0625))),
915            Token::Number(Ok(Number::F64(10.0))),
916            Token::Number(Ok(Number::AbstractInt(10))),
917            Token::Word("l"),
918        ],
919    )
920}
921
922#[test]
923fn test_tokens() {
924    sub_test("id123_OK", &[Token::Word("id123_OK")]);
925    sub_test(
926        "92No",
927        &[
928            Token::Number(Ok(Number::AbstractInt(92))),
929            Token::Word("No"),
930        ],
931    );
932    sub_test(
933        "2u3o",
934        &[
935            Token::Number(Ok(Number::U32(2))),
936            Token::Number(Ok(Number::AbstractInt(3))),
937            Token::Word("o"),
938        ],
939    );
940    sub_test(
941        "2.4f44po",
942        &[
943            Token::Number(Ok(Number::F32(2.4))),
944            Token::Number(Ok(Number::AbstractInt(44))),
945            Token::Word("po"),
946        ],
947    );
948    sub_test(
949        "Δέλτα réflexion Кызыл 𐰓𐰏𐰇 朝焼け سلام 검정 שָׁלוֹם गुलाबी փիրուզ",
950        &[
951            Token::Word("Δέλτα"),
952            Token::Word("réflexion"),
953            Token::Word("Кызыл"),
954            Token::Word("𐰓𐰏𐰇"),
955            Token::Word("朝焼け"),
956            Token::Word("سلام"),
957            Token::Word("검정"),
958            Token::Word("שָׁלוֹם"),
959            Token::Word("गुलाबी"),
960            Token::Word("փիրուզ"),
961        ],
962    );
963    sub_test("æNoø", &[Token::Word("æNoø")]);
964    sub_test("No¾", &[Token::Word("No"), Token::Unknown('¾')]);
965    sub_test("No好", &[Token::Word("No好")]);
966    sub_test("_No", &[Token::Word("_No")]);
967
968    sub_test_with_and_without_doc_comments(
969        "*/*/***/*//=/*****//",
970        &[
971            Token::Operation('*'),
972            Token::AssignmentOperation('/'),
973            Token::DocComment("/*****/"),
974            Token::Operation('/'),
975        ],
976    );
977
978    // Type suffixes are only allowed on hex float literals
979    // if you provided an exponent.
980    sub_test(
981        "0x1.2f 0x1.2f 0x1.2h 0x1.2H 0x1.2lf",
982        &[
983            // The 'f' suffixes are taken as a hex digit:
984            // the fractional part is 0x2f / 256.
985            Token::Number(Ok(Number::AbstractFloat(1.0 + 0x2f as f64 / 256.0))),
986            Token::Number(Ok(Number::AbstractFloat(1.0 + 0x2f as f64 / 256.0))),
987            Token::Number(Ok(Number::AbstractFloat(1.125))),
988            Token::Word("h"),
989            Token::Number(Ok(Number::AbstractFloat(1.125))),
990            Token::Word("H"),
991            Token::Number(Ok(Number::AbstractFloat(1.125))),
992            Token::Word("lf"),
993        ],
994    )
995}
996
997#[test]
998fn test_variable_decl() {
999    sub_test(
1000        "@group(0 ) var< uniform> texture:   texture_multisampled_2d <f32 >;",
1001        &[
1002            Token::Attribute,
1003            Token::Word("group"),
1004            Token::Paren('('),
1005            Token::Number(Ok(Number::AbstractInt(0))),
1006            Token::Paren(')'),
1007            Token::Word("var"),
1008            Token::TemplateArgsStart,
1009            Token::Word("uniform"),
1010            Token::TemplateArgsEnd,
1011            Token::Word("texture"),
1012            Token::Separator(':'),
1013            Token::Word("texture_multisampled_2d"),
1014            Token::TemplateArgsStart,
1015            Token::Word("f32"),
1016            Token::TemplateArgsEnd,
1017            Token::Separator(';'),
1018        ],
1019    );
1020    sub_test(
1021        "var<storage,read_write> buffer: array<u32>;",
1022        &[
1023            Token::Word("var"),
1024            Token::TemplateArgsStart,
1025            Token::Word("storage"),
1026            Token::Separator(','),
1027            Token::Word("read_write"),
1028            Token::TemplateArgsEnd,
1029            Token::Word("buffer"),
1030            Token::Separator(':'),
1031            Token::Word("array"),
1032            Token::TemplateArgsStart,
1033            Token::Word("u32"),
1034            Token::TemplateArgsEnd,
1035            Token::Separator(';'),
1036        ],
1037    );
1038}
1039
1040#[test]
1041fn test_template_list() {
1042    sub_test(
1043        "A<B||C>D",
1044        &[
1045            Token::Word("A"),
1046            Token::Paren('<'),
1047            Token::Word("B"),
1048            Token::LogicalOperation('|'),
1049            Token::Word("C"),
1050            Token::Paren('>'),
1051            Token::Word("D"),
1052        ],
1053    );
1054    sub_test(
1055        "A(B<C,D>(E))",
1056        &[
1057            Token::Word("A"),
1058            Token::Paren('('),
1059            Token::Word("B"),
1060            Token::TemplateArgsStart,
1061            Token::Word("C"),
1062            Token::Separator(','),
1063            Token::Word("D"),
1064            Token::TemplateArgsEnd,
1065            Token::Paren('('),
1066            Token::Word("E"),
1067            Token::Paren(')'),
1068            Token::Paren(')'),
1069        ],
1070    );
1071    sub_test(
1072        "array<i32,select(2,3,A>B)>",
1073        &[
1074            Token::Word("array"),
1075            Token::TemplateArgsStart,
1076            Token::Word("i32"),
1077            Token::Separator(','),
1078            Token::Word("select"),
1079            Token::Paren('('),
1080            Token::Number(Ok(Number::AbstractInt(2))),
1081            Token::Separator(','),
1082            Token::Number(Ok(Number::AbstractInt(3))),
1083            Token::Separator(','),
1084            Token::Word("A"),
1085            Token::Paren('>'),
1086            Token::Word("B"),
1087            Token::Paren(')'),
1088            Token::TemplateArgsEnd,
1089        ],
1090    );
1091    sub_test(
1092        "A[B<C]>D",
1093        &[
1094            Token::Word("A"),
1095            Token::Paren('['),
1096            Token::Word("B"),
1097            Token::Paren('<'),
1098            Token::Word("C"),
1099            Token::Paren(']'),
1100            Token::Paren('>'),
1101            Token::Word("D"),
1102        ],
1103    );
1104    sub_test(
1105        "A<B<<C>",
1106        &[
1107            Token::Word("A"),
1108            Token::TemplateArgsStart,
1109            Token::Word("B"),
1110            Token::ShiftOperation('<'),
1111            Token::Word("C"),
1112            Token::TemplateArgsEnd,
1113        ],
1114    );
1115    sub_test(
1116        "A<(B>=C)>",
1117        &[
1118            Token::Word("A"),
1119            Token::TemplateArgsStart,
1120            Token::Paren('('),
1121            Token::Word("B"),
1122            Token::LogicalOperation('>'),
1123            Token::Word("C"),
1124            Token::Paren(')'),
1125            Token::TemplateArgsEnd,
1126        ],
1127    );
1128    sub_test(
1129        "A<B>=C>",
1130        &[
1131            Token::Word("A"),
1132            Token::TemplateArgsStart,
1133            Token::Word("B"),
1134            Token::TemplateArgsEnd,
1135            Token::Operation('='),
1136            Token::Word("C"),
1137            Token::Paren('>'),
1138        ],
1139    );
1140}
1141
1142#[test]
1143fn test_comments() {
1144    sub_test("// Single comment", &[]);
1145
1146    sub_test(
1147        "/* multi
1148    line
1149    comment */",
1150        &[],
1151    );
1152    sub_test(
1153        "/* multi
1154    line
1155    comment */
1156    // and another",
1157        &[],
1158    );
1159}
1160
1161#[test]
1162fn test_doc_comments() {
1163    sub_test_with_and_without_doc_comments(
1164        "/// Single comment",
1165        &[Token::DocComment("/// Single comment")],
1166    );
1167
1168    sub_test_with_and_without_doc_comments(
1169        "/** multi
1170    line
1171    comment */",
1172        &[Token::DocComment(
1173            "/** multi
1174    line
1175    comment */",
1176        )],
1177    );
1178    sub_test_with_and_without_doc_comments(
1179        "/** multi
1180    line
1181    comment */
1182    /// and another",
1183        &[
1184            Token::DocComment(
1185                "/** multi
1186    line
1187    comment */",
1188            ),
1189            Token::DocComment("/// and another"),
1190        ],
1191    );
1192}
1193
1194#[test]
1195fn test_doc_comment_nested() {
1196    sub_test_with_and_without_doc_comments(
1197        "/**
1198    a comment with nested one /**
1199        nested comment
1200    */
1201    */
1202    const a : i32 = 2;",
1203        &[
1204            Token::DocComment(
1205                "/**
1206    a comment with nested one /**
1207        nested comment
1208    */
1209    */",
1210            ),
1211            Token::Word("const"),
1212            Token::Word("a"),
1213            Token::Separator(':'),
1214            Token::Word("i32"),
1215            Token::Operation('='),
1216            Token::Number(Ok(Number::AbstractInt(2))),
1217            Token::Separator(';'),
1218        ],
1219    );
1220}
1221
1222#[test]
1223fn test_doc_comment_long_character() {
1224    sub_test_with_and_without_doc_comments(
1225        "/// π/2
1226        ///     D(𝐡) = ───────────────────────────────────────────────────
1227///            παₜα_b((𝐡 ⋅ 𝐭)² / αₜ²) + (𝐡 ⋅ 𝐛)² / α_b² +`
1228    const a : i32 = 2;",
1229        &[
1230            Token::DocComment("/// π/2"),
1231            Token::DocComment("///     D(𝐡) = ───────────────────────────────────────────────────"),
1232            Token::DocComment("///            παₜα_b((𝐡 ⋅ 𝐭)² / αₜ²) + (𝐡 ⋅ 𝐛)² / α_b² +`"),
1233            Token::Word("const"),
1234            Token::Word("a"),
1235            Token::Separator(':'),
1236            Token::Word("i32"),
1237            Token::Operation('='),
1238            Token::Number(Ok(Number::AbstractInt(2))),
1239            Token::Separator(';'),
1240        ],
1241    );
1242}
1243
1244#[test]
1245fn test_doc_comments_module() {
1246    sub_test_with_and_without_doc_comments(
1247        "//! Comment Module
1248        //! Another one.
1249        /*! Different module comment */
1250        /// Trying to break module comment
1251        // Trying to break module comment again
1252        //! After a regular comment is ok.
1253        /*! Different module comment again */
1254
1255        //! After a break is supported.
1256        const
1257        //! After anything else is not.",
1258        &[
1259            Token::ModuleDocComment("//! Comment Module"),
1260            Token::ModuleDocComment("//! Another one."),
1261            Token::ModuleDocComment("/*! Different module comment */"),
1262            Token::DocComment("/// Trying to break module comment"),
1263            Token::ModuleDocComment("//! After a regular comment is ok."),
1264            Token::ModuleDocComment("/*! Different module comment again */"),
1265            Token::ModuleDocComment("//! After a break is supported."),
1266            Token::Word("const"),
1267            Token::ModuleDocComment("//! After anything else is not."),
1268        ],
1269    );
1270}
1271
1272#[test]
1273fn test_block_comment_unclosed() {
1274    sub_test_with_and_without_doc_comments(
1275        "/** Unclosed Doc Comment",
1276        &[Token::UnterminatedBlockComment("/** Unclosed Doc Comment")],
1277    );
1278}