naga/front/wgsl/parse/
lexer.rs

1use super::{number::consume_number, Error, ExpectedToken, Result};
2use crate::front::wgsl::error::NumberError;
3use crate::front::wgsl::parse::directive::enable_extension::EnableExtensions;
4use crate::front::wgsl::parse::{conv, Number};
5use crate::front::wgsl::{ImplementedEnableExtension, Scalar};
6use crate::Span;
7
8use alloc::{boxed::Box, vec::Vec};
9
10type TokenSpan<'a> = (Token<'a>, Span);
11
12#[derive(Copy, Clone, Debug, PartialEq)]
13pub enum Token<'a> {
14    Separator(char),
15    Paren(char),
16    Attribute,
17    Number(core::result::Result<Number, NumberError>),
18    Word(&'a str),
19    Operation(char),
20    LogicalOperation(char),
21    ShiftOperation(char),
22    AssignmentOperation(char),
23    IncrementOperation,
24    DecrementOperation,
25    Arrow,
26    Unknown(char),
27    Trivia,
28    DocComment(&'a str),
29    ModuleDocComment(&'a str),
30    End,
31}
32
33fn consume_any(input: &str, what: impl Fn(char) -> bool) -> (&str, &str) {
34    let pos = input.find(|c| !what(c)).unwrap_or(input.len());
35    input.split_at(pos)
36}
37
38/// Return the token at the start of `input`.
39///
40/// If `generic` is `false`, then the bit shift operators `>>` or `<<`
41/// are valid lookahead tokens for the current parser state (see [§3.1
42/// Parsing] in the WGSL specification). In other words:
43///
44/// -   If `generic` is `true`, then we are expecting an angle bracket
45///     around a generic type parameter, like the `<` and `>` in
46///     `vec3<f32>`, so interpret `<` and `>` as `Token::Paren` tokens,
47///     even if they're part of `<<` or `>>` sequences.
48///
49/// -   Otherwise, interpret `<<` and `>>` as shift operators:
50///     `Token::LogicalOperation` tokens.
51///
52/// If `ignore_doc_comments` is true, doc comments are treated as [`Token::Trivia`].
53///
54/// [§3.1 Parsing]: https://gpuweb.github.io/gpuweb/wgsl/#parsing
55fn consume_token(input: &str, generic: bool, ignore_doc_comments: bool) -> (Token<'_>, &str) {
56    let mut chars = input.chars();
57    let cur = match chars.next() {
58        Some(c) => c,
59        None => return (Token::End, ""),
60    };
61    match cur {
62        ':' | ';' | ',' => (Token::Separator(cur), chars.as_str()),
63        '.' => {
64            let og_chars = chars.as_str();
65            match chars.next() {
66                Some('0'..='9') => consume_number(input),
67                _ => (Token::Separator(cur), og_chars),
68            }
69        }
70        '@' => (Token::Attribute, chars.as_str()),
71        '(' | ')' | '{' | '}' | '[' | ']' => (Token::Paren(cur), chars.as_str()),
72        '<' | '>' => {
73            let og_chars = chars.as_str();
74            match chars.next() {
75                Some('=') if !generic => (Token::LogicalOperation(cur), chars.as_str()),
76                Some(c) if c == cur && !generic => {
77                    let og_chars = chars.as_str();
78                    match chars.next() {
79                        Some('=') => (Token::AssignmentOperation(cur), chars.as_str()),
80                        _ => (Token::ShiftOperation(cur), og_chars),
81                    }
82                }
83                _ => (Token::Paren(cur), og_chars),
84            }
85        }
86        '0'..='9' => consume_number(input),
87        '/' => {
88            let og_chars = chars.as_str();
89            match chars.next() {
90                Some('/') => {
91                    let mut input_chars = input.char_indices();
92                    let doc_comment_end = input_chars
93                        .find_map(|(index, c)| is_comment_end(c).then_some(index))
94                        .unwrap_or(input.len());
95                    let token = match chars.next() {
96                        Some('/') if !ignore_doc_comments => {
97                            Token::DocComment(&input[..doc_comment_end])
98                        }
99                        Some('!') if !ignore_doc_comments => {
100                            Token::ModuleDocComment(&input[..doc_comment_end])
101                        }
102                        _ => Token::Trivia,
103                    };
104                    (token, input_chars.as_str())
105                }
106                Some('*') => {
107                    let next_c = chars.next();
108
109                    enum CommentType {
110                        Doc,
111                        ModuleDoc,
112                        Normal,
113                    }
114                    let comment_type = match next_c {
115                        Some('*') if !ignore_doc_comments => CommentType::Doc,
116                        Some('!') if !ignore_doc_comments => CommentType::ModuleDoc,
117                        _ => CommentType::Normal,
118                    };
119
120                    let mut depth = 1;
121                    let mut prev = next_c;
122
123                    for c in &mut chars {
124                        match (prev, c) {
125                            (Some('*'), '/') => {
126                                prev = None;
127                                depth -= 1;
128                                if depth == 0 {
129                                    let rest = chars.as_str();
130                                    let token = match comment_type {
131                                        CommentType::Doc => {
132                                            let doc_comment_end = input.len() - rest.len();
133                                            Token::DocComment(&input[..doc_comment_end])
134                                        }
135                                        CommentType::ModuleDoc => {
136                                            let doc_comment_end = input.len() - rest.len();
137                                            Token::ModuleDocComment(&input[..doc_comment_end])
138                                        }
139                                        CommentType::Normal => Token::Trivia,
140                                    };
141                                    return (token, rest);
142                                }
143                            }
144                            (Some('/'), '*') => {
145                                prev = None;
146                                depth += 1;
147                            }
148                            _ => {
149                                prev = Some(c);
150                            }
151                        }
152                    }
153
154                    (Token::End, "")
155                }
156                Some('=') => (Token::AssignmentOperation(cur), chars.as_str()),
157                _ => (Token::Operation(cur), og_chars),
158            }
159        }
160        '-' => {
161            let og_chars = chars.as_str();
162            match chars.next() {
163                Some('>') => (Token::Arrow, chars.as_str()),
164                Some('-') => (Token::DecrementOperation, chars.as_str()),
165                Some('=') => (Token::AssignmentOperation(cur), chars.as_str()),
166                _ => (Token::Operation(cur), og_chars),
167            }
168        }
169        '+' => {
170            let og_chars = chars.as_str();
171            match chars.next() {
172                Some('+') => (Token::IncrementOperation, chars.as_str()),
173                Some('=') => (Token::AssignmentOperation(cur), chars.as_str()),
174                _ => (Token::Operation(cur), og_chars),
175            }
176        }
177        '*' | '%' | '^' => {
178            let og_chars = chars.as_str();
179            match chars.next() {
180                Some('=') => (Token::AssignmentOperation(cur), chars.as_str()),
181                _ => (Token::Operation(cur), og_chars),
182            }
183        }
184        '~' => (Token::Operation(cur), chars.as_str()),
185        '=' | '!' => {
186            let og_chars = chars.as_str();
187            match chars.next() {
188                Some('=') => (Token::LogicalOperation(cur), chars.as_str()),
189                _ => (Token::Operation(cur), og_chars),
190            }
191        }
192        '&' | '|' => {
193            let og_chars = chars.as_str();
194            match chars.next() {
195                Some(c) if c == cur => (Token::LogicalOperation(cur), chars.as_str()),
196                Some('=') => (Token::AssignmentOperation(cur), chars.as_str()),
197                _ => (Token::Operation(cur), og_chars),
198            }
199        }
200        _ if is_blankspace(cur) => {
201            let (_, rest) = consume_any(input, is_blankspace);
202            (Token::Trivia, rest)
203        }
204        _ if is_word_start(cur) => {
205            let (word, rest) = consume_any(input, is_word_part);
206            (Token::Word(word), rest)
207        }
208        _ => (Token::Unknown(cur), chars.as_str()),
209    }
210}
211
212/// Returns whether or not a char is a comment end
213/// (Unicode Pattern_White_Space excluding U+0020, U+0009, U+200E and U+200F)
214/// <https://www.w3.org/TR/WGSL/#line-break>
215const fn is_comment_end(c: char) -> bool {
216    match c {
217        '\u{000a}'..='\u{000d}' | '\u{0085}' | '\u{2028}' | '\u{2029}' => true,
218        _ => false,
219    }
220}
221
222/// Returns whether or not a char is a blankspace (Unicode Pattern_White_Space)
223const fn is_blankspace(c: char) -> bool {
224    match c {
225        '\u{0020}'
226        | '\u{0009}'..='\u{000d}'
227        | '\u{0085}'
228        | '\u{200e}'
229        | '\u{200f}'
230        | '\u{2028}'
231        | '\u{2029}' => true,
232        _ => false,
233    }
234}
235
236/// Returns whether or not a char is a word start (Unicode XID_Start + '_')
237fn is_word_start(c: char) -> bool {
238    c == '_' || unicode_ident::is_xid_start(c)
239}
240
241/// Returns whether or not a char is a word part (Unicode XID_Continue)
242fn is_word_part(c: char) -> bool {
243    unicode_ident::is_xid_continue(c)
244}
245
246#[derive(Clone)]
247pub(in crate::front::wgsl) struct Lexer<'a> {
248    /// The remaining unconsumed input.
249    input: &'a str,
250
251    /// The full original source code.
252    ///
253    /// We compare `input` against this to compute the lexer's current offset in
254    /// the source.
255    pub(in crate::front::wgsl) source: &'a str,
256
257    /// The byte offset of the end of the most recently returned non-trivia
258    /// token.
259    ///
260    /// This is consulted by the `span_from` function, for finding the
261    /// end of the span for larger structures like expressions or
262    /// statements.
263    last_end_offset: usize,
264
265    /// Whether or not to ignore doc comments.
266    /// If `true`, doc comments are treated as [`Token::Trivia`].
267    ignore_doc_comments: bool,
268
269    /// The set of [enable-extensions] present in the module, determined in a pre-pass.
270    ///
271    /// [enable-extensions]: https://gpuweb.github.io/gpuweb/wgsl/#enable-extensions-sec
272    pub(in crate::front::wgsl) enable_extensions: EnableExtensions,
273}
274
275impl<'a> Lexer<'a> {
276    pub(in crate::front::wgsl) const fn new(input: &'a str, ignore_doc_comments: bool) -> Self {
277        Lexer {
278            input,
279            source: input,
280            last_end_offset: 0,
281            enable_extensions: EnableExtensions::empty(),
282            ignore_doc_comments,
283        }
284    }
285
286    /// Check that `extension` is enabled in `self`.
287    pub(in crate::front::wgsl) fn require_enable_extension(
288        &self,
289        extension: ImplementedEnableExtension,
290        span: Span,
291    ) -> Result<'static, ()> {
292        if self.enable_extensions.contains(extension) {
293            Ok(())
294        } else {
295            Err(Box::new(Error::EnableExtensionNotEnabled {
296                kind: extension.into(),
297                span,
298            }))
299        }
300    }
301
302    /// Calls the function with a lexer and returns the result of the function as well as the span for everything the function parsed
303    ///
304    /// # Examples
305    /// ```ignore
306    /// let lexer = Lexer::new("5");
307    /// let (value, span) = lexer.capture_span(Lexer::next_uint_literal);
308    /// assert_eq!(value, 5);
309    /// ```
310    #[inline]
311    pub fn capture_span<T, E>(
312        &mut self,
313        inner: impl FnOnce(&mut Self) -> core::result::Result<T, E>,
314    ) -> core::result::Result<(T, Span), E> {
315        let start = self.current_byte_offset();
316        let res = inner(self)?;
317        let end = self.current_byte_offset();
318        Ok((res, Span::from(start..end)))
319    }
320
321    pub(in crate::front::wgsl) fn start_byte_offset(&mut self) -> usize {
322        loop {
323            // Eat all trivia because `next` doesn't eat trailing trivia.
324            let (token, rest) = consume_token(self.input, false, true);
325            if let Token::Trivia = token {
326                self.input = rest;
327            } else {
328                return self.current_byte_offset();
329            }
330        }
331    }
332
333    fn peek_token_and_rest(&mut self) -> (TokenSpan<'a>, &'a str) {
334        let mut cloned = self.clone();
335        let token = cloned.next();
336        let rest = cloned.input;
337        (token, rest)
338    }
339
340    /// Collect all module doc comments until a non doc token is found.
341    pub(in crate::front::wgsl) fn accumulate_module_doc_comments(&mut self) -> Vec<&'a str> {
342        let mut doc_comments = Vec::new();
343        loop {
344            // ignore blankspace
345            self.input = consume_any(self.input, is_blankspace).1;
346
347            let (token, rest) = consume_token(self.input, false, self.ignore_doc_comments);
348            if let Token::ModuleDocComment(doc_comment) = token {
349                self.input = rest;
350                doc_comments.push(doc_comment);
351            } else {
352                return doc_comments;
353            }
354        }
355    }
356
357    /// Collect all doc comments until a non doc token is found.
358    pub(in crate::front::wgsl) fn accumulate_doc_comments(&mut self) -> Vec<&'a str> {
359        let mut doc_comments = Vec::new();
360        loop {
361            // ignore blankspace
362            self.input = consume_any(self.input, is_blankspace).1;
363
364            let (token, rest) = consume_token(self.input, false, self.ignore_doc_comments);
365            if let Token::DocComment(doc_comment) = token {
366                self.input = rest;
367                doc_comments.push(doc_comment);
368            } else {
369                return doc_comments;
370            }
371        }
372    }
373
374    const fn current_byte_offset(&self) -> usize {
375        self.source.len() - self.input.len()
376    }
377
378    pub(in crate::front::wgsl) fn span_from(&self, offset: usize) -> Span {
379        Span::from(offset..self.last_end_offset)
380    }
381
382    /// Return the next non-whitespace token from `self`.
383    ///
384    /// Assume we are a parse state where bit shift operators may
385    /// occur, but not angle brackets.
386    #[must_use]
387    pub(in crate::front::wgsl) fn next(&mut self) -> TokenSpan<'a> {
388        self.next_impl(false, true)
389    }
390
391    /// Return the next non-whitespace token from `self`.
392    ///
393    /// Assume we are in a parse state where angle brackets may occur,
394    /// but not bit shift operators.
395    #[must_use]
396    pub(in crate::front::wgsl) fn next_generic(&mut self) -> TokenSpan<'a> {
397        self.next_impl(true, true)
398    }
399
400    #[cfg(test)]
401    pub fn next_with_unignored_doc_comments(&mut self) -> TokenSpan<'a> {
402        self.next_impl(false, false)
403    }
404
405    /// Return the next non-whitespace token from `self`, with a span.
406    ///
407    /// See [`consume_token`] for the meaning of `generic`.
408    fn next_impl(&mut self, generic: bool, ignore_doc_comments: bool) -> TokenSpan<'a> {
409        let mut start_byte_offset = self.current_byte_offset();
410        loop {
411            let (token, rest) = consume_token(
412                self.input,
413                generic,
414                ignore_doc_comments || self.ignore_doc_comments,
415            );
416            self.input = rest;
417            match token {
418                Token::Trivia => start_byte_offset = self.current_byte_offset(),
419                _ => {
420                    self.last_end_offset = self.current_byte_offset();
421                    return (token, self.span_from(start_byte_offset));
422                }
423            }
424        }
425    }
426
427    #[must_use]
428    pub(in crate::front::wgsl) fn peek(&mut self) -> TokenSpan<'a> {
429        let (token, _) = self.peek_token_and_rest();
430        token
431    }
432
433    pub(in crate::front::wgsl) fn expect_span(&mut self, expected: Token<'a>) -> Result<'a, Span> {
434        let next = self.next();
435        if next.0 == expected {
436            Ok(next.1)
437        } else {
438            Err(Box::new(Error::Unexpected(
439                next.1,
440                ExpectedToken::Token(expected),
441            )))
442        }
443    }
444
445    pub(in crate::front::wgsl) fn expect(&mut self, expected: Token<'a>) -> Result<'a, ()> {
446        self.expect_span(expected)?;
447        Ok(())
448    }
449
450    pub(in crate::front::wgsl) fn expect_generic_paren(
451        &mut self,
452        expected: char,
453    ) -> Result<'a, ()> {
454        let next = self.next_generic();
455        if next.0 == Token::Paren(expected) {
456            Ok(())
457        } else {
458            Err(Box::new(Error::Unexpected(
459                next.1,
460                ExpectedToken::Token(Token::Paren(expected)),
461            )))
462        }
463    }
464
465    pub(in crate::front::wgsl) fn end_of_generic_arguments(&mut self) -> bool {
466        self.skip(Token::Separator(',')) && self.peek().0 != Token::Paren('>')
467    }
468
469    /// If the next token matches it is skipped and true is returned
470    pub(in crate::front::wgsl) fn skip(&mut self, what: Token<'_>) -> bool {
471        let (peeked_token, rest) = self.peek_token_and_rest();
472        if peeked_token.0 == what {
473            self.input = rest;
474            true
475        } else {
476            false
477        }
478    }
479
480    pub(in crate::front::wgsl) fn next_ident_with_span(&mut self) -> Result<'a, (&'a str, Span)> {
481        match self.next() {
482            (Token::Word(word), span) => Self::word_as_ident_with_span(word, span),
483            other => Err(Box::new(Error::Unexpected(
484                other.1,
485                ExpectedToken::Identifier,
486            ))),
487        }
488    }
489
490    pub(in crate::front::wgsl) fn peek_ident_with_span(&mut self) -> Result<'a, (&'a str, Span)> {
491        match self.peek() {
492            (Token::Word(word), span) => Self::word_as_ident_with_span(word, span),
493            other => Err(Box::new(Error::Unexpected(
494                other.1,
495                ExpectedToken::Identifier,
496            ))),
497        }
498    }
499
500    fn word_as_ident_with_span(word: &'a str, span: Span) -> Result<'a, (&'a str, Span)> {
501        match word {
502            "_" => Err(Box::new(Error::InvalidIdentifierUnderscore(span))),
503            word if word.starts_with("__") => Err(Box::new(Error::ReservedIdentifierPrefix(span))),
504            word => Ok((word, span)),
505        }
506    }
507
508    pub(in crate::front::wgsl) fn next_ident(&mut self) -> Result<'a, super::ast::Ident<'a>> {
509        self.next_ident_with_span()
510            .and_then(|(word, span)| Self::word_as_ident(word, span))
511            .map(|(name, span)| super::ast::Ident { name, span })
512    }
513
514    fn word_as_ident(word: &'a str, span: Span) -> Result<'a, (&'a str, Span)> {
515        if crate::keywords::wgsl::RESERVED.contains(&word) {
516            Err(Box::new(Error::ReservedKeyword(span)))
517        } else {
518            Ok((word, span))
519        }
520    }
521
522    /// Parses a generic scalar type, for example `<f32>`.
523    pub(in crate::front::wgsl) fn next_scalar_generic(&mut self) -> Result<'a, Scalar> {
524        self.expect_generic_paren('<')?;
525        let (scalar, _span) = match self.next() {
526            (Token::Word(word), span) => {
527                conv::get_scalar_type(&self.enable_extensions, span, word)?
528                    .map(|scalar| (scalar, span))
529                    .ok_or(Error::UnknownScalarType(span))?
530            }
531            (_, span) => return Err(Box::new(Error::UnknownScalarType(span))),
532        };
533
534        self.expect_generic_paren('>')?;
535        Ok(scalar)
536    }
537
538    /// Parses a generic scalar type, for example `<f32>`.
539    ///
540    /// Returns the span covering the inner type, excluding the brackets.
541    pub(in crate::front::wgsl) fn next_scalar_generic_with_span(
542        &mut self,
543    ) -> Result<'a, (Scalar, Span)> {
544        self.expect_generic_paren('<')?;
545
546        let (scalar, span) = match self.next() {
547            (Token::Word(word), span) => {
548                conv::get_scalar_type(&self.enable_extensions, span, word)?
549                    .map(|scalar| (scalar, span))
550                    .ok_or(Error::UnknownScalarType(span))?
551            }
552            (_, span) => return Err(Box::new(Error::UnknownScalarType(span))),
553        };
554
555        self.expect_generic_paren('>')?;
556        Ok((scalar, span))
557    }
558
559    pub(in crate::front::wgsl) fn next_storage_access(
560        &mut self,
561    ) -> Result<'a, crate::StorageAccess> {
562        let (ident, span) = self.next_ident_with_span()?;
563        match ident {
564            "read" => Ok(crate::StorageAccess::LOAD),
565            "write" => Ok(crate::StorageAccess::STORE),
566            "read_write" => Ok(crate::StorageAccess::LOAD | crate::StorageAccess::STORE),
567            "atomic" => Ok(crate::StorageAccess::ATOMIC
568                | crate::StorageAccess::LOAD
569                | crate::StorageAccess::STORE),
570            _ => Err(Box::new(Error::UnknownAccess(span))),
571        }
572    }
573
574    pub(in crate::front::wgsl) fn next_format_generic(
575        &mut self,
576    ) -> Result<'a, (crate::StorageFormat, crate::StorageAccess)> {
577        self.expect(Token::Paren('<'))?;
578        let (ident, ident_span) = self.next_ident_with_span()?;
579        let format = conv::map_storage_format(ident, ident_span)?;
580        self.expect(Token::Separator(','))?;
581        let access = self.next_storage_access()?;
582        self.expect(Token::Paren('>'))?;
583        Ok((format, access))
584    }
585
586    pub(in crate::front::wgsl) fn next_acceleration_structure_flags(&mut self) -> Result<'a, bool> {
587        Ok(if self.skip(Token::Paren('<')) {
588            if !self.skip(Token::Paren('>')) {
589                let (name, span) = self.next_ident_with_span()?;
590                let ret = if name == "vertex_return" {
591                    true
592                } else {
593                    return Err(Box::new(Error::UnknownAttribute(span)));
594                };
595                self.skip(Token::Separator(','));
596                self.expect(Token::Paren('>'))?;
597                ret
598            } else {
599                false
600            }
601        } else {
602            false
603        })
604    }
605
606    pub(in crate::front::wgsl) fn next_cooperative_role(
607        &mut self,
608    ) -> Result<'a, crate::CooperativeRole> {
609        let (ident, span) = self.next_ident_with_span()?;
610        match ident {
611            "A" => Ok(crate::CooperativeRole::A),
612            "B" => Ok(crate::CooperativeRole::B),
613            "C" => Ok(crate::CooperativeRole::C),
614            _ => Err(Box::new(Error::UnknownAccess(span))),
615        }
616    }
617
618    pub(in crate::front::wgsl) fn open_arguments(&mut self) -> Result<'a, ()> {
619        self.expect(Token::Paren('('))
620    }
621
622    pub(in crate::front::wgsl) fn close_arguments(&mut self) -> Result<'a, ()> {
623        let _ = self.skip(Token::Separator(','));
624        self.expect(Token::Paren(')'))
625    }
626
627    pub(in crate::front::wgsl) fn next_argument(&mut self) -> Result<'a, bool> {
628        let paren = Token::Paren(')');
629        if self.skip(Token::Separator(',')) {
630            Ok(!self.skip(paren))
631        } else {
632            self.expect(paren).map(|()| false)
633        }
634    }
635}
636
637#[cfg(test)]
638#[track_caller]
639fn sub_test(source: &str, expected_tokens: &[Token]) {
640    sub_test_with(true, source, expected_tokens);
641}
642
643#[cfg(test)]
644#[track_caller]
645fn sub_test_with_and_without_doc_comments(source: &str, expected_tokens: &[Token]) {
646    sub_test_with(false, source, expected_tokens);
647    sub_test_with(
648        true,
649        source,
650        expected_tokens
651            .iter()
652            .filter(|v| !matches!(**v, Token::DocComment(_) | Token::ModuleDocComment(_)))
653            .cloned()
654            .collect::<Vec<_>>()
655            .as_slice(),
656    );
657}
658
659#[cfg(test)]
660#[track_caller]
661fn sub_test_with(ignore_doc_comments: bool, source: &str, expected_tokens: &[Token]) {
662    let mut lex = Lexer::new(source, ignore_doc_comments);
663    for &token in expected_tokens {
664        assert_eq!(lex.next_with_unignored_doc_comments().0, token);
665    }
666    assert_eq!(lex.next().0, Token::End);
667}
668
669#[test]
670fn test_numbers() {
671    use half::f16;
672    // WGSL spec examples //
673
674    // decimal integer
675    sub_test(
676        "0x123 0X123u 1u 123 0 0i 0x3f",
677        &[
678            Token::Number(Ok(Number::AbstractInt(291))),
679            Token::Number(Ok(Number::U32(291))),
680            Token::Number(Ok(Number::U32(1))),
681            Token::Number(Ok(Number::AbstractInt(123))),
682            Token::Number(Ok(Number::AbstractInt(0))),
683            Token::Number(Ok(Number::I32(0))),
684            Token::Number(Ok(Number::AbstractInt(63))),
685        ],
686    );
687    // decimal floating point
688    sub_test(
689        "0.e+4f 01. .01 12.34 .0f 0h 1e-3 0xa.fp+2 0x1P+4f 0X.3 0x3p+2h 0X1.fp-4 0x3.2p+2h",
690        &[
691            Token::Number(Ok(Number::F32(0.))),
692            Token::Number(Ok(Number::AbstractFloat(1.))),
693            Token::Number(Ok(Number::AbstractFloat(0.01))),
694            Token::Number(Ok(Number::AbstractFloat(12.34))),
695            Token::Number(Ok(Number::F32(0.))),
696            Token::Number(Ok(Number::F16(f16::from_f32(0.)))),
697            Token::Number(Ok(Number::AbstractFloat(0.001))),
698            Token::Number(Ok(Number::AbstractFloat(43.75))),
699            Token::Number(Ok(Number::F32(16.))),
700            Token::Number(Ok(Number::AbstractFloat(0.1875))),
701            // https://github.com/gfx-rs/wgpu/issues/7046
702            Token::Number(Err(NumberError::NotRepresentable)), // Should be 0.75
703            Token::Number(Ok(Number::AbstractFloat(0.12109375))),
704            // https://github.com/gfx-rs/wgpu/issues/7046
705            Token::Number(Err(NumberError::NotRepresentable)), // Should be 12.5
706        ],
707    );
708
709    // MIN / MAX //
710
711    // min / max decimal integer
712    sub_test(
713        "0i 2147483647i 2147483648i",
714        &[
715            Token::Number(Ok(Number::I32(0))),
716            Token::Number(Ok(Number::I32(i32::MAX))),
717            Token::Number(Err(NumberError::NotRepresentable)),
718        ],
719    );
720    // min / max decimal unsigned integer
721    sub_test(
722        "0u 4294967295u 4294967296u",
723        &[
724            Token::Number(Ok(Number::U32(u32::MIN))),
725            Token::Number(Ok(Number::U32(u32::MAX))),
726            Token::Number(Err(NumberError::NotRepresentable)),
727        ],
728    );
729
730    // min / max hexadecimal signed integer
731    sub_test(
732        "0x0i 0x7FFFFFFFi 0x80000000i",
733        &[
734            Token::Number(Ok(Number::I32(0))),
735            Token::Number(Ok(Number::I32(i32::MAX))),
736            Token::Number(Err(NumberError::NotRepresentable)),
737        ],
738    );
739    // min / max hexadecimal unsigned integer
740    sub_test(
741        "0x0u 0xFFFFFFFFu 0x100000000u",
742        &[
743            Token::Number(Ok(Number::U32(u32::MIN))),
744            Token::Number(Ok(Number::U32(u32::MAX))),
745            Token::Number(Err(NumberError::NotRepresentable)),
746        ],
747    );
748
749    // min/max decimal abstract int
750    sub_test(
751        "0 9223372036854775807 9223372036854775808",
752        &[
753            Token::Number(Ok(Number::AbstractInt(0))),
754            Token::Number(Ok(Number::AbstractInt(i64::MAX))),
755            Token::Number(Err(NumberError::NotRepresentable)),
756        ],
757    );
758
759    // min/max hexadecimal abstract int
760    sub_test(
761        "0 0x7fffffffffffffff 0x8000000000000000",
762        &[
763            Token::Number(Ok(Number::AbstractInt(0))),
764            Token::Number(Ok(Number::AbstractInt(i64::MAX))),
765            Token::Number(Err(NumberError::NotRepresentable)),
766        ],
767    );
768
769    /// ≈ 2^-126 * 2^−23 (= 2^−149)
770    const SMALLEST_POSITIVE_SUBNORMAL_F32: f32 = 1e-45;
771    /// ≈ 2^-126 * (1 − 2^−23)
772    const LARGEST_SUBNORMAL_F32: f32 = 1.1754942e-38;
773    /// ≈ 2^-126
774    const SMALLEST_POSITIVE_NORMAL_F32: f32 = f32::MIN_POSITIVE;
775    /// ≈ 1 − 2^−24
776    const LARGEST_F32_LESS_THAN_ONE: f32 = 0.99999994;
777    /// ≈ 1 + 2^−23
778    const SMALLEST_F32_LARGER_THAN_ONE: f32 = 1.0000001;
779    /// ≈ 2^127 * (2 − 2^−23)
780    const LARGEST_NORMAL_F32: f32 = f32::MAX;
781
782    // decimal floating point
783    sub_test(
784        "1e-45f 1.1754942e-38f 1.17549435e-38f 0.99999994f 1.0000001f 3.40282347e+38f",
785        &[
786            Token::Number(Ok(Number::F32(SMALLEST_POSITIVE_SUBNORMAL_F32))),
787            Token::Number(Ok(Number::F32(LARGEST_SUBNORMAL_F32))),
788            Token::Number(Ok(Number::F32(SMALLEST_POSITIVE_NORMAL_F32))),
789            Token::Number(Ok(Number::F32(LARGEST_F32_LESS_THAN_ONE))),
790            Token::Number(Ok(Number::F32(SMALLEST_F32_LARGER_THAN_ONE))),
791            Token::Number(Ok(Number::F32(LARGEST_NORMAL_F32))),
792        ],
793    );
794    sub_test(
795        "3.40282367e+38f",
796        &[
797            Token::Number(Err(NumberError::NotRepresentable)), // ≈ 2^128
798        ],
799    );
800
801    // hexadecimal floating point
802    sub_test(
803        "0x1p-149f 0x7FFFFFp-149f 0x1p-126f 0xFFFFFFp-24f 0x800001p-23f 0xFFFFFFp+104f",
804        &[
805            Token::Number(Ok(Number::F32(SMALLEST_POSITIVE_SUBNORMAL_F32))),
806            Token::Number(Ok(Number::F32(LARGEST_SUBNORMAL_F32))),
807            Token::Number(Ok(Number::F32(SMALLEST_POSITIVE_NORMAL_F32))),
808            Token::Number(Ok(Number::F32(LARGEST_F32_LESS_THAN_ONE))),
809            Token::Number(Ok(Number::F32(SMALLEST_F32_LARGER_THAN_ONE))),
810            Token::Number(Ok(Number::F32(LARGEST_NORMAL_F32))),
811        ],
812    );
813    sub_test(
814        "0x1p128f 0x1.000001p0f",
815        &[
816            Token::Number(Err(NumberError::NotRepresentable)), // = 2^128
817            Token::Number(Err(NumberError::NotRepresentable)),
818        ],
819    );
820}
821
822#[test]
823fn double_floats() {
824    sub_test(
825        "0x1.2p4lf 0x1p8lf 0.0625lf 625e-4lf 10lf 10l",
826        &[
827            Token::Number(Ok(Number::F64(18.0))),
828            Token::Number(Ok(Number::F64(256.0))),
829            Token::Number(Ok(Number::F64(0.0625))),
830            Token::Number(Ok(Number::F64(0.0625))),
831            Token::Number(Ok(Number::F64(10.0))),
832            Token::Number(Ok(Number::AbstractInt(10))),
833            Token::Word("l"),
834        ],
835    )
836}
837
838#[test]
839fn test_tokens() {
840    sub_test("id123_OK", &[Token::Word("id123_OK")]);
841    sub_test(
842        "92No",
843        &[
844            Token::Number(Ok(Number::AbstractInt(92))),
845            Token::Word("No"),
846        ],
847    );
848    sub_test(
849        "2u3o",
850        &[
851            Token::Number(Ok(Number::U32(2))),
852            Token::Number(Ok(Number::AbstractInt(3))),
853            Token::Word("o"),
854        ],
855    );
856    sub_test(
857        "2.4f44po",
858        &[
859            Token::Number(Ok(Number::F32(2.4))),
860            Token::Number(Ok(Number::AbstractInt(44))),
861            Token::Word("po"),
862        ],
863    );
864    sub_test(
865        "Δέλτα réflexion Кызыл 𐰓𐰏𐰇 朝焼け سلام 검정 שָׁלוֹם गुलाबी փիրուզ",
866        &[
867            Token::Word("Δέλτα"),
868            Token::Word("réflexion"),
869            Token::Word("Кызыл"),
870            Token::Word("𐰓𐰏𐰇"),
871            Token::Word("朝焼け"),
872            Token::Word("سلام"),
873            Token::Word("검정"),
874            Token::Word("שָׁלוֹם"),
875            Token::Word("गुलाबी"),
876            Token::Word("փիրուզ"),
877        ],
878    );
879    sub_test("æNoø", &[Token::Word("æNoø")]);
880    sub_test("No¾", &[Token::Word("No"), Token::Unknown('¾')]);
881    sub_test("No好", &[Token::Word("No好")]);
882    sub_test("_No", &[Token::Word("_No")]);
883
884    sub_test_with_and_without_doc_comments(
885        "*/*/***/*//=/*****//",
886        &[
887            Token::Operation('*'),
888            Token::AssignmentOperation('/'),
889            Token::DocComment("/*****/"),
890            Token::Operation('/'),
891        ],
892    );
893
894    // Type suffixes are only allowed on hex float literals
895    // if you provided an exponent.
896    sub_test(
897        "0x1.2f 0x1.2f 0x1.2h 0x1.2H 0x1.2lf",
898        &[
899            // The 'f' suffixes are taken as a hex digit:
900            // the fractional part is 0x2f / 256.
901            Token::Number(Ok(Number::AbstractFloat(1.0 + 0x2f as f64 / 256.0))),
902            Token::Number(Ok(Number::AbstractFloat(1.0 + 0x2f as f64 / 256.0))),
903            Token::Number(Ok(Number::AbstractFloat(1.125))),
904            Token::Word("h"),
905            Token::Number(Ok(Number::AbstractFloat(1.125))),
906            Token::Word("H"),
907            Token::Number(Ok(Number::AbstractFloat(1.125))),
908            Token::Word("lf"),
909        ],
910    )
911}
912
913#[test]
914fn test_variable_decl() {
915    sub_test(
916        "@group(0 ) var< uniform> texture:   texture_multisampled_2d <f32 >;",
917        &[
918            Token::Attribute,
919            Token::Word("group"),
920            Token::Paren('('),
921            Token::Number(Ok(Number::AbstractInt(0))),
922            Token::Paren(')'),
923            Token::Word("var"),
924            Token::Paren('<'),
925            Token::Word("uniform"),
926            Token::Paren('>'),
927            Token::Word("texture"),
928            Token::Separator(':'),
929            Token::Word("texture_multisampled_2d"),
930            Token::Paren('<'),
931            Token::Word("f32"),
932            Token::Paren('>'),
933            Token::Separator(';'),
934        ],
935    );
936    sub_test(
937        "var<storage,read_write> buffer: array<u32>;",
938        &[
939            Token::Word("var"),
940            Token::Paren('<'),
941            Token::Word("storage"),
942            Token::Separator(','),
943            Token::Word("read_write"),
944            Token::Paren('>'),
945            Token::Word("buffer"),
946            Token::Separator(':'),
947            Token::Word("array"),
948            Token::Paren('<'),
949            Token::Word("u32"),
950            Token::Paren('>'),
951            Token::Separator(';'),
952        ],
953    );
954}
955
956#[test]
957fn test_comments() {
958    sub_test("// Single comment", &[]);
959
960    sub_test(
961        "/* multi
962    line
963    comment */",
964        &[],
965    );
966    sub_test(
967        "/* multi
968    line
969    comment */
970    // and another",
971        &[],
972    );
973}
974
975#[test]
976fn test_doc_comments() {
977    sub_test_with_and_without_doc_comments(
978        "/// Single comment",
979        &[Token::DocComment("/// Single comment")],
980    );
981
982    sub_test_with_and_without_doc_comments(
983        "/** multi
984    line
985    comment */",
986        &[Token::DocComment(
987            "/** multi
988    line
989    comment */",
990        )],
991    );
992    sub_test_with_and_without_doc_comments(
993        "/** multi
994    line
995    comment */
996    /// and another",
997        &[
998            Token::DocComment(
999                "/** multi
1000    line
1001    comment */",
1002            ),
1003            Token::DocComment("/// and another"),
1004        ],
1005    );
1006}
1007
1008#[test]
1009fn test_doc_comment_nested() {
1010    sub_test_with_and_without_doc_comments(
1011        "/**
1012    a comment with nested one /**
1013        nested comment
1014    */
1015    */
1016    const a : i32 = 2;",
1017        &[
1018            Token::DocComment(
1019                "/**
1020    a comment with nested one /**
1021        nested comment
1022    */
1023    */",
1024            ),
1025            Token::Word("const"),
1026            Token::Word("a"),
1027            Token::Separator(':'),
1028            Token::Word("i32"),
1029            Token::Operation('='),
1030            Token::Number(Ok(Number::AbstractInt(2))),
1031            Token::Separator(';'),
1032        ],
1033    );
1034}
1035
1036#[test]
1037fn test_doc_comment_long_character() {
1038    sub_test_with_and_without_doc_comments(
1039        "/// π/2
1040        ///     D(𝐡) = ───────────────────────────────────────────────────
1041///            παₜα_b((𝐡 ⋅ 𝐭)² / αₜ²) + (𝐡 ⋅ 𝐛)² / α_b² +`
1042    const a : i32 = 2;",
1043        &[
1044            Token::DocComment("/// π/2"),
1045            Token::DocComment("///     D(𝐡) = ───────────────────────────────────────────────────"),
1046            Token::DocComment("///            παₜα_b((𝐡 ⋅ 𝐭)² / αₜ²) + (𝐡 ⋅ 𝐛)² / α_b² +`"),
1047            Token::Word("const"),
1048            Token::Word("a"),
1049            Token::Separator(':'),
1050            Token::Word("i32"),
1051            Token::Operation('='),
1052            Token::Number(Ok(Number::AbstractInt(2))),
1053            Token::Separator(';'),
1054        ],
1055    );
1056}
1057
1058#[test]
1059fn test_doc_comments_module() {
1060    sub_test_with_and_without_doc_comments(
1061        "//! Comment Module
1062        //! Another one.
1063        /*! Different module comment */
1064        /// Trying to break module comment
1065        // Trying to break module comment again
1066        //! After a regular comment is ok.
1067        /*! Different module comment again */
1068
1069        //! After a break is supported.
1070        const
1071        //! After anything else is not.",
1072        &[
1073            Token::ModuleDocComment("//! Comment Module"),
1074            Token::ModuleDocComment("//! Another one."),
1075            Token::ModuleDocComment("/*! Different module comment */"),
1076            Token::DocComment("/// Trying to break module comment"),
1077            Token::ModuleDocComment("//! After a regular comment is ok."),
1078            Token::ModuleDocComment("/*! Different module comment again */"),
1079            Token::ModuleDocComment("//! After a break is supported."),
1080            Token::Word("const"),
1081        ],
1082    );
1083}