naga/front/wgsl/parse/
lexer.rs

1use super::{number::consume_number, Error, ExpectedToken, Result};
2use crate::front::wgsl::error::NumberError;
3use crate::front::wgsl::parse::directive::enable_extension::EnableExtensions;
4use crate::front::wgsl::parse::{conv, Number};
5use crate::front::wgsl::Scalar;
6use crate::Span;
7
8use alloc::{boxed::Box, vec::Vec};
9
10type TokenSpan<'a> = (Token<'a>, Span);
11
12#[derive(Copy, Clone, Debug, PartialEq)]
13pub enum Token<'a> {
14    Separator(char),
15    Paren(char),
16    Attribute,
17    Number(core::result::Result<Number, NumberError>),
18    Word(&'a str),
19    Operation(char),
20    LogicalOperation(char),
21    ShiftOperation(char),
22    AssignmentOperation(char),
23    IncrementOperation,
24    DecrementOperation,
25    Arrow,
26    Unknown(char),
27    Trivia,
28    DocComment(&'a str),
29    ModuleDocComment(&'a str),
30    End,
31}
32
33fn consume_any(input: &str, what: impl Fn(char) -> bool) -> (&str, &str) {
34    let pos = input.find(|c| !what(c)).unwrap_or(input.len());
35    input.split_at(pos)
36}
37
38/// Return the token at the start of `input`.
39///
40/// If `generic` is `false`, then the bit shift operators `>>` or `<<`
41/// are valid lookahead tokens for the current parser state (see [§3.1
42/// Parsing] in the WGSL specification). In other words:
43///
44/// -   If `generic` is `true`, then we are expecting an angle bracket
45///     around a generic type parameter, like the `<` and `>` in
46///     `vec3<f32>`, so interpret `<` and `>` as `Token::Paren` tokens,
47///     even if they're part of `<<` or `>>` sequences.
48///
49/// -   Otherwise, interpret `<<` and `>>` as shift operators:
50///     `Token::LogicalOperation` tokens.
51///
52/// If `ignore_doc_comments` is true, doc comments are treated as [`Token::Trivia`].
53///
54/// [§3.1 Parsing]: https://gpuweb.github.io/gpuweb/wgsl/#parsing
55fn consume_token(input: &str, generic: bool, ignore_doc_comments: bool) -> (Token<'_>, &str) {
56    let mut chars = input.chars();
57    let cur = match chars.next() {
58        Some(c) => c,
59        None => return (Token::End, ""),
60    };
61    match cur {
62        ':' | ';' | ',' => (Token::Separator(cur), chars.as_str()),
63        '.' => {
64            let og_chars = chars.as_str();
65            match chars.next() {
66                Some('0'..='9') => consume_number(input),
67                _ => (Token::Separator(cur), og_chars),
68            }
69        }
70        '@' => (Token::Attribute, chars.as_str()),
71        '(' | ')' | '{' | '}' | '[' | ']' => (Token::Paren(cur), chars.as_str()),
72        '<' | '>' => {
73            let og_chars = chars.as_str();
74            match chars.next() {
75                Some('=') if !generic => (Token::LogicalOperation(cur), chars.as_str()),
76                Some(c) if c == cur && !generic => {
77                    let og_chars = chars.as_str();
78                    match chars.next() {
79                        Some('=') => (Token::AssignmentOperation(cur), chars.as_str()),
80                        _ => (Token::ShiftOperation(cur), og_chars),
81                    }
82                }
83                _ => (Token::Paren(cur), og_chars),
84            }
85        }
86        '0'..='9' => consume_number(input),
87        '/' => {
88            let og_chars = chars.as_str();
89            match chars.next() {
90                Some('/') => {
91                    let mut input_chars = input.char_indices();
92                    let doc_comment_end = input_chars
93                        .find_map(|(index, c)| is_comment_end(c).then_some(index))
94                        .unwrap_or(input.len());
95                    let token = match chars.next() {
96                        Some('/') if !ignore_doc_comments => {
97                            Token::DocComment(&input[..doc_comment_end])
98                        }
99                        Some('!') if !ignore_doc_comments => {
100                            Token::ModuleDocComment(&input[..doc_comment_end])
101                        }
102                        _ => Token::Trivia,
103                    };
104                    (token, input_chars.as_str())
105                }
106                Some('*') => {
107                    let next_c = chars.next();
108
109                    enum CommentType {
110                        Doc,
111                        ModuleDoc,
112                        Normal,
113                    }
114                    let comment_type = match next_c {
115                        Some('*') if !ignore_doc_comments => CommentType::Doc,
116                        Some('!') if !ignore_doc_comments => CommentType::ModuleDoc,
117                        _ => CommentType::Normal,
118                    };
119
120                    let mut depth = 1;
121                    let mut prev = next_c;
122
123                    for c in &mut chars {
124                        match (prev, c) {
125                            (Some('*'), '/') => {
126                                prev = None;
127                                depth -= 1;
128                                if depth == 0 {
129                                    let rest = chars.as_str();
130                                    let token = match comment_type {
131                                        CommentType::Doc => {
132                                            let doc_comment_end = input.len() - rest.len();
133                                            Token::DocComment(&input[..doc_comment_end])
134                                        }
135                                        CommentType::ModuleDoc => {
136                                            let doc_comment_end = input.len() - rest.len();
137                                            Token::ModuleDocComment(&input[..doc_comment_end])
138                                        }
139                                        CommentType::Normal => Token::Trivia,
140                                    };
141                                    return (token, rest);
142                                }
143                            }
144                            (Some('/'), '*') => {
145                                prev = None;
146                                depth += 1;
147                            }
148                            _ => {
149                                prev = Some(c);
150                            }
151                        }
152                    }
153
154                    (Token::End, "")
155                }
156                Some('=') => (Token::AssignmentOperation(cur), chars.as_str()),
157                _ => (Token::Operation(cur), og_chars),
158            }
159        }
160        '-' => {
161            let og_chars = chars.as_str();
162            match chars.next() {
163                Some('>') => (Token::Arrow, chars.as_str()),
164                Some('-') => (Token::DecrementOperation, chars.as_str()),
165                Some('=') => (Token::AssignmentOperation(cur), chars.as_str()),
166                _ => (Token::Operation(cur), og_chars),
167            }
168        }
169        '+' => {
170            let og_chars = chars.as_str();
171            match chars.next() {
172                Some('+') => (Token::IncrementOperation, chars.as_str()),
173                Some('=') => (Token::AssignmentOperation(cur), chars.as_str()),
174                _ => (Token::Operation(cur), og_chars),
175            }
176        }
177        '*' | '%' | '^' => {
178            let og_chars = chars.as_str();
179            match chars.next() {
180                Some('=') => (Token::AssignmentOperation(cur), chars.as_str()),
181                _ => (Token::Operation(cur), og_chars),
182            }
183        }
184        '~' => (Token::Operation(cur), chars.as_str()),
185        '=' | '!' => {
186            let og_chars = chars.as_str();
187            match chars.next() {
188                Some('=') => (Token::LogicalOperation(cur), chars.as_str()),
189                _ => (Token::Operation(cur), og_chars),
190            }
191        }
192        '&' | '|' => {
193            let og_chars = chars.as_str();
194            match chars.next() {
195                Some(c) if c == cur => (Token::LogicalOperation(cur), chars.as_str()),
196                Some('=') => (Token::AssignmentOperation(cur), chars.as_str()),
197                _ => (Token::Operation(cur), og_chars),
198            }
199        }
200        _ if is_blankspace(cur) => {
201            let (_, rest) = consume_any(input, is_blankspace);
202            (Token::Trivia, rest)
203        }
204        _ if is_word_start(cur) => {
205            let (word, rest) = consume_any(input, is_word_part);
206            (Token::Word(word), rest)
207        }
208        _ => (Token::Unknown(cur), chars.as_str()),
209    }
210}
211
212/// Returns whether or not a char is a comment end
213/// (Unicode Pattern_White_Space excluding U+0020, U+0009, U+200E and U+200F)
214/// <https://www.w3.org/TR/WGSL/#line-break>
215const fn is_comment_end(c: char) -> bool {
216    match c {
217        '\u{000a}'..='\u{000d}' | '\u{0085}' | '\u{2028}' | '\u{2029}' => true,
218        _ => false,
219    }
220}
221
222/// Returns whether or not a char is a blankspace (Unicode Pattern_White_Space)
223const fn is_blankspace(c: char) -> bool {
224    match c {
225        '\u{0020}'
226        | '\u{0009}'..='\u{000d}'
227        | '\u{0085}'
228        | '\u{200e}'
229        | '\u{200f}'
230        | '\u{2028}'
231        | '\u{2029}' => true,
232        _ => false,
233    }
234}
235
236/// Returns whether or not a char is a word start (Unicode XID_Start + '_')
237fn is_word_start(c: char) -> bool {
238    c == '_' || unicode_ident::is_xid_start(c)
239}
240
241/// Returns whether or not a char is a word part (Unicode XID_Continue)
242fn is_word_part(c: char) -> bool {
243    unicode_ident::is_xid_continue(c)
244}
245
246#[derive(Clone)]
247pub(in crate::front::wgsl) struct Lexer<'a> {
248    /// The remaining unconsumed input.
249    input: &'a str,
250
251    /// The full original source code.
252    ///
253    /// We compare `input` against this to compute the lexer's current offset in
254    /// the source.
255    pub(in crate::front::wgsl) source: &'a str,
256
257    /// The byte offset of the end of the most recently returned non-trivia
258    /// token.
259    ///
260    /// This is consulted by the `span_from` function, for finding the
261    /// end of the span for larger structures like expressions or
262    /// statements.
263    last_end_offset: usize,
264
265    /// Whether or not to ignore doc comments.
266    /// If `true`, doc comments are treated as [`Token::Trivia`].
267    ignore_doc_comments: bool,
268
269    pub(in crate::front::wgsl) enable_extensions: EnableExtensions,
270}
271
272impl<'a> Lexer<'a> {
273    pub(in crate::front::wgsl) const fn new(input: &'a str, ignore_doc_comments: bool) -> Self {
274        Lexer {
275            input,
276            source: input,
277            last_end_offset: 0,
278            enable_extensions: EnableExtensions::empty(),
279            ignore_doc_comments,
280        }
281    }
282
283    /// Calls the function with a lexer and returns the result of the function as well as the span for everything the function parsed
284    ///
285    /// # Examples
286    /// ```ignore
287    /// let lexer = Lexer::new("5");
288    /// let (value, span) = lexer.capture_span(Lexer::next_uint_literal);
289    /// assert_eq!(value, 5);
290    /// ```
291    #[inline]
292    pub fn capture_span<T, E>(
293        &mut self,
294        inner: impl FnOnce(&mut Self) -> core::result::Result<T, E>,
295    ) -> core::result::Result<(T, Span), E> {
296        let start = self.current_byte_offset();
297        let res = inner(self)?;
298        let end = self.current_byte_offset();
299        Ok((res, Span::from(start..end)))
300    }
301
302    pub(in crate::front::wgsl) fn start_byte_offset(&mut self) -> usize {
303        loop {
304            // Eat all trivia because `next` doesn't eat trailing trivia.
305            let (token, rest) = consume_token(self.input, false, true);
306            if let Token::Trivia = token {
307                self.input = rest;
308            } else {
309                return self.current_byte_offset();
310            }
311        }
312    }
313
314    fn peek_token_and_rest(&mut self) -> (TokenSpan<'a>, &'a str) {
315        let mut cloned = self.clone();
316        let token = cloned.next();
317        let rest = cloned.input;
318        (token, rest)
319    }
320
321    /// Collect all module doc comments until a non doc token is found.
322    pub(in crate::front::wgsl) fn accumulate_module_doc_comments(&mut self) -> Vec<&'a str> {
323        let mut doc_comments = Vec::new();
324        loop {
325            // ignore blankspace
326            self.input = consume_any(self.input, is_blankspace).1;
327
328            let (token, rest) = consume_token(self.input, false, self.ignore_doc_comments);
329            if let Token::ModuleDocComment(doc_comment) = token {
330                self.input = rest;
331                doc_comments.push(doc_comment);
332            } else {
333                return doc_comments;
334            }
335        }
336    }
337
338    /// Collect all doc comments until a non doc token is found.
339    pub(in crate::front::wgsl) fn accumulate_doc_comments(&mut self) -> Vec<&'a str> {
340        let mut doc_comments = Vec::new();
341        loop {
342            // ignore blankspace
343            self.input = consume_any(self.input, is_blankspace).1;
344
345            let (token, rest) = consume_token(self.input, false, self.ignore_doc_comments);
346            if let Token::DocComment(doc_comment) = token {
347                self.input = rest;
348                doc_comments.push(doc_comment);
349            } else {
350                return doc_comments;
351            }
352        }
353    }
354
355    const fn current_byte_offset(&self) -> usize {
356        self.source.len() - self.input.len()
357    }
358
359    pub(in crate::front::wgsl) fn span_from(&self, offset: usize) -> Span {
360        Span::from(offset..self.last_end_offset)
361    }
362
363    /// Return the next non-whitespace token from `self`.
364    ///
365    /// Assume we are a parse state where bit shift operators may
366    /// occur, but not angle brackets.
367    #[must_use]
368    pub(in crate::front::wgsl) fn next(&mut self) -> TokenSpan<'a> {
369        self.next_impl(false, true)
370    }
371
372    /// Return the next non-whitespace token from `self`.
373    ///
374    /// Assume we are in a parse state where angle brackets may occur,
375    /// but not bit shift operators.
376    #[must_use]
377    pub(in crate::front::wgsl) fn next_generic(&mut self) -> TokenSpan<'a> {
378        self.next_impl(true, true)
379    }
380
381    #[cfg(test)]
382    pub fn next_with_unignored_doc_comments(&mut self) -> TokenSpan<'a> {
383        self.next_impl(false, false)
384    }
385
386    /// Return the next non-whitespace token from `self`, with a span.
387    ///
388    /// See [`consume_token`] for the meaning of `generic`.
389    fn next_impl(&mut self, generic: bool, ignore_doc_comments: bool) -> TokenSpan<'a> {
390        let mut start_byte_offset = self.current_byte_offset();
391        loop {
392            let (token, rest) = consume_token(
393                self.input,
394                generic,
395                ignore_doc_comments || self.ignore_doc_comments,
396            );
397            self.input = rest;
398            match token {
399                Token::Trivia => start_byte_offset = self.current_byte_offset(),
400                _ => {
401                    self.last_end_offset = self.current_byte_offset();
402                    return (token, self.span_from(start_byte_offset));
403                }
404            }
405        }
406    }
407
408    #[must_use]
409    pub(in crate::front::wgsl) fn peek(&mut self) -> TokenSpan<'a> {
410        let (token, _) = self.peek_token_and_rest();
411        token
412    }
413
414    pub(in crate::front::wgsl) fn expect_span(&mut self, expected: Token<'a>) -> Result<'a, Span> {
415        let next = self.next();
416        if next.0 == expected {
417            Ok(next.1)
418        } else {
419            Err(Box::new(Error::Unexpected(
420                next.1,
421                ExpectedToken::Token(expected),
422            )))
423        }
424    }
425
426    pub(in crate::front::wgsl) fn expect(&mut self, expected: Token<'a>) -> Result<'a, ()> {
427        self.expect_span(expected)?;
428        Ok(())
429    }
430
431    pub(in crate::front::wgsl) fn expect_generic_paren(
432        &mut self,
433        expected: char,
434    ) -> Result<'a, ()> {
435        let next = self.next_generic();
436        if next.0 == Token::Paren(expected) {
437            Ok(())
438        } else {
439            Err(Box::new(Error::Unexpected(
440                next.1,
441                ExpectedToken::Token(Token::Paren(expected)),
442            )))
443        }
444    }
445
446    pub(in crate::front::wgsl) fn end_of_generic_arguments(&mut self) -> bool {
447        self.skip(Token::Separator(',')) && self.peek().0 != Token::Paren('>')
448    }
449
450    /// If the next token matches it is skipped and true is returned
451    pub(in crate::front::wgsl) fn skip(&mut self, what: Token<'_>) -> bool {
452        let (peeked_token, rest) = self.peek_token_and_rest();
453        if peeked_token.0 == what {
454            self.input = rest;
455            true
456        } else {
457            false
458        }
459    }
460
461    pub(in crate::front::wgsl) fn next_ident_with_span(&mut self) -> Result<'a, (&'a str, Span)> {
462        match self.next() {
463            (Token::Word(word), span) => Self::word_as_ident_with_span(word, span),
464            other => Err(Box::new(Error::Unexpected(
465                other.1,
466                ExpectedToken::Identifier,
467            ))),
468        }
469    }
470
471    pub(in crate::front::wgsl) fn peek_ident_with_span(&mut self) -> Result<'a, (&'a str, Span)> {
472        match self.peek() {
473            (Token::Word(word), span) => Self::word_as_ident_with_span(word, span),
474            other => Err(Box::new(Error::Unexpected(
475                other.1,
476                ExpectedToken::Identifier,
477            ))),
478        }
479    }
480
481    fn word_as_ident_with_span(word: &'a str, span: Span) -> Result<'a, (&'a str, Span)> {
482        match word {
483            "_" => Err(Box::new(Error::InvalidIdentifierUnderscore(span))),
484            word if word.starts_with("__") => Err(Box::new(Error::ReservedIdentifierPrefix(span))),
485            word => Ok((word, span)),
486        }
487    }
488
489    pub(in crate::front::wgsl) fn next_ident(&mut self) -> Result<'a, super::ast::Ident<'a>> {
490        self.next_ident_with_span()
491            .and_then(|(word, span)| Self::word_as_ident(word, span))
492            .map(|(name, span)| super::ast::Ident { name, span })
493    }
494
495    fn word_as_ident(word: &'a str, span: Span) -> Result<'a, (&'a str, Span)> {
496        if crate::keywords::wgsl::RESERVED.contains(&word) {
497            Err(Box::new(Error::ReservedKeyword(span)))
498        } else {
499            Ok((word, span))
500        }
501    }
502
503    /// Parses a generic scalar type, for example `<f32>`.
504    pub(in crate::front::wgsl) fn next_scalar_generic(&mut self) -> Result<'a, Scalar> {
505        self.expect_generic_paren('<')?;
506        let (scalar, _span) = match self.next() {
507            (Token::Word(word), span) => {
508                conv::get_scalar_type(&self.enable_extensions, span, word)?
509                    .map(|scalar| (scalar, span))
510                    .ok_or(Error::UnknownScalarType(span))?
511            }
512            (_, span) => return Err(Box::new(Error::UnknownScalarType(span))),
513        };
514
515        self.expect_generic_paren('>')?;
516        Ok(scalar)
517    }
518
519    /// Parses a generic scalar type, for example `<f32>`.
520    ///
521    /// Returns the span covering the inner type, excluding the brackets.
522    pub(in crate::front::wgsl) fn next_scalar_generic_with_span(
523        &mut self,
524    ) -> Result<'a, (Scalar, Span)> {
525        self.expect_generic_paren('<')?;
526
527        let (scalar, span) = match self.next() {
528            (Token::Word(word), span) => {
529                conv::get_scalar_type(&self.enable_extensions, span, word)?
530                    .map(|scalar| (scalar, span))
531                    .ok_or(Error::UnknownScalarType(span))?
532            }
533            (_, span) => return Err(Box::new(Error::UnknownScalarType(span))),
534        };
535
536        self.expect_generic_paren('>')?;
537        Ok((scalar, span))
538    }
539
540    pub(in crate::front::wgsl) fn next_storage_access(
541        &mut self,
542    ) -> Result<'a, crate::StorageAccess> {
543        let (ident, span) = self.next_ident_with_span()?;
544        match ident {
545            "read" => Ok(crate::StorageAccess::LOAD),
546            "write" => Ok(crate::StorageAccess::STORE),
547            "read_write" => Ok(crate::StorageAccess::LOAD | crate::StorageAccess::STORE),
548            "atomic" => Ok(crate::StorageAccess::ATOMIC
549                | crate::StorageAccess::LOAD
550                | crate::StorageAccess::STORE),
551            _ => Err(Box::new(Error::UnknownAccess(span))),
552        }
553    }
554
555    pub(in crate::front::wgsl) fn next_format_generic(
556        &mut self,
557    ) -> Result<'a, (crate::StorageFormat, crate::StorageAccess)> {
558        self.expect(Token::Paren('<'))?;
559        let (ident, ident_span) = self.next_ident_with_span()?;
560        let format = conv::map_storage_format(ident, ident_span)?;
561        self.expect(Token::Separator(','))?;
562        let access = self.next_storage_access()?;
563        self.expect(Token::Paren('>'))?;
564        Ok((format, access))
565    }
566
567    pub(in crate::front::wgsl) fn next_acceleration_structure_flags(&mut self) -> Result<'a, bool> {
568        Ok(if self.skip(Token::Paren('<')) {
569            if !self.skip(Token::Paren('>')) {
570                let (name, span) = self.next_ident_with_span()?;
571                let ret = if name == "vertex_return" {
572                    true
573                } else {
574                    return Err(Box::new(Error::UnknownAttribute(span)));
575                };
576                self.skip(Token::Separator(','));
577                self.expect(Token::Paren('>'))?;
578                ret
579            } else {
580                false
581            }
582        } else {
583            false
584        })
585    }
586
587    pub(in crate::front::wgsl) fn open_arguments(&mut self) -> Result<'a, ()> {
588        self.expect(Token::Paren('('))
589    }
590
591    pub(in crate::front::wgsl) fn close_arguments(&mut self) -> Result<'a, ()> {
592        let _ = self.skip(Token::Separator(','));
593        self.expect(Token::Paren(')'))
594    }
595
596    pub(in crate::front::wgsl) fn next_argument(&mut self) -> Result<'a, bool> {
597        let paren = Token::Paren(')');
598        if self.skip(Token::Separator(',')) {
599            Ok(!self.skip(paren))
600        } else {
601            self.expect(paren).map(|()| false)
602        }
603    }
604}
605
606#[cfg(test)]
607#[track_caller]
608fn sub_test(source: &str, expected_tokens: &[Token]) {
609    sub_test_with(true, source, expected_tokens);
610}
611
612#[cfg(test)]
613#[track_caller]
614fn sub_test_with_and_without_doc_comments(source: &str, expected_tokens: &[Token]) {
615    sub_test_with(false, source, expected_tokens);
616    sub_test_with(
617        true,
618        source,
619        expected_tokens
620            .iter()
621            .filter(|v| !matches!(**v, Token::DocComment(_) | Token::ModuleDocComment(_)))
622            .cloned()
623            .collect::<Vec<_>>()
624            .as_slice(),
625    );
626}
627
628#[cfg(test)]
629#[track_caller]
630fn sub_test_with(ignore_doc_comments: bool, source: &str, expected_tokens: &[Token]) {
631    let mut lex = Lexer::new(source, ignore_doc_comments);
632    for &token in expected_tokens {
633        assert_eq!(lex.next_with_unignored_doc_comments().0, token);
634    }
635    assert_eq!(lex.next().0, Token::End);
636}
637
638#[test]
639fn test_numbers() {
640    use half::f16;
641    // WGSL spec examples //
642
643    // decimal integer
644    sub_test(
645        "0x123 0X123u 1u 123 0 0i 0x3f",
646        &[
647            Token::Number(Ok(Number::AbstractInt(291))),
648            Token::Number(Ok(Number::U32(291))),
649            Token::Number(Ok(Number::U32(1))),
650            Token::Number(Ok(Number::AbstractInt(123))),
651            Token::Number(Ok(Number::AbstractInt(0))),
652            Token::Number(Ok(Number::I32(0))),
653            Token::Number(Ok(Number::AbstractInt(63))),
654        ],
655    );
656    // decimal floating point
657    sub_test(
658        "0.e+4f 01. .01 12.34 .0f 0h 1e-3 0xa.fp+2 0x1P+4f 0X.3 0x3p+2h 0X1.fp-4 0x3.2p+2h",
659        &[
660            Token::Number(Ok(Number::F32(0.))),
661            Token::Number(Ok(Number::AbstractFloat(1.))),
662            Token::Number(Ok(Number::AbstractFloat(0.01))),
663            Token::Number(Ok(Number::AbstractFloat(12.34))),
664            Token::Number(Ok(Number::F32(0.))),
665            Token::Number(Ok(Number::F16(f16::from_f32(0.)))),
666            Token::Number(Ok(Number::AbstractFloat(0.001))),
667            Token::Number(Ok(Number::AbstractFloat(43.75))),
668            Token::Number(Ok(Number::F32(16.))),
669            Token::Number(Ok(Number::AbstractFloat(0.1875))),
670            // https://github.com/gfx-rs/wgpu/issues/7046
671            Token::Number(Err(NumberError::NotRepresentable)), // Should be 0.75
672            Token::Number(Ok(Number::AbstractFloat(0.12109375))),
673            // https://github.com/gfx-rs/wgpu/issues/7046
674            Token::Number(Err(NumberError::NotRepresentable)), // Should be 12.5
675        ],
676    );
677
678    // MIN / MAX //
679
680    // min / max decimal integer
681    sub_test(
682        "0i 2147483647i 2147483648i",
683        &[
684            Token::Number(Ok(Number::I32(0))),
685            Token::Number(Ok(Number::I32(i32::MAX))),
686            Token::Number(Err(NumberError::NotRepresentable)),
687        ],
688    );
689    // min / max decimal unsigned integer
690    sub_test(
691        "0u 4294967295u 4294967296u",
692        &[
693            Token::Number(Ok(Number::U32(u32::MIN))),
694            Token::Number(Ok(Number::U32(u32::MAX))),
695            Token::Number(Err(NumberError::NotRepresentable)),
696        ],
697    );
698
699    // min / max hexadecimal signed integer
700    sub_test(
701        "0x0i 0x7FFFFFFFi 0x80000000i",
702        &[
703            Token::Number(Ok(Number::I32(0))),
704            Token::Number(Ok(Number::I32(i32::MAX))),
705            Token::Number(Err(NumberError::NotRepresentable)),
706        ],
707    );
708    // min / max hexadecimal unsigned integer
709    sub_test(
710        "0x0u 0xFFFFFFFFu 0x100000000u",
711        &[
712            Token::Number(Ok(Number::U32(u32::MIN))),
713            Token::Number(Ok(Number::U32(u32::MAX))),
714            Token::Number(Err(NumberError::NotRepresentable)),
715        ],
716    );
717
718    // min/max decimal abstract int
719    sub_test(
720        "0 9223372036854775807 9223372036854775808",
721        &[
722            Token::Number(Ok(Number::AbstractInt(0))),
723            Token::Number(Ok(Number::AbstractInt(i64::MAX))),
724            Token::Number(Err(NumberError::NotRepresentable)),
725        ],
726    );
727
728    // min/max hexadecimal abstract int
729    sub_test(
730        "0 0x7fffffffffffffff 0x8000000000000000",
731        &[
732            Token::Number(Ok(Number::AbstractInt(0))),
733            Token::Number(Ok(Number::AbstractInt(i64::MAX))),
734            Token::Number(Err(NumberError::NotRepresentable)),
735        ],
736    );
737
738    /// ≈ 2^-126 * 2^−23 (= 2^−149)
739    const SMALLEST_POSITIVE_SUBNORMAL_F32: f32 = 1e-45;
740    /// ≈ 2^-126 * (1 − 2^−23)
741    const LARGEST_SUBNORMAL_F32: f32 = 1.1754942e-38;
742    /// ≈ 2^-126
743    const SMALLEST_POSITIVE_NORMAL_F32: f32 = f32::MIN_POSITIVE;
744    /// ≈ 1 − 2^−24
745    const LARGEST_F32_LESS_THAN_ONE: f32 = 0.99999994;
746    /// ≈ 1 + 2^−23
747    const SMALLEST_F32_LARGER_THAN_ONE: f32 = 1.0000001;
748    /// ≈ 2^127 * (2 − 2^−23)
749    const LARGEST_NORMAL_F32: f32 = f32::MAX;
750
751    // decimal floating point
752    sub_test(
753        "1e-45f 1.1754942e-38f 1.17549435e-38f 0.99999994f 1.0000001f 3.40282347e+38f",
754        &[
755            Token::Number(Ok(Number::F32(SMALLEST_POSITIVE_SUBNORMAL_F32))),
756            Token::Number(Ok(Number::F32(LARGEST_SUBNORMAL_F32))),
757            Token::Number(Ok(Number::F32(SMALLEST_POSITIVE_NORMAL_F32))),
758            Token::Number(Ok(Number::F32(LARGEST_F32_LESS_THAN_ONE))),
759            Token::Number(Ok(Number::F32(SMALLEST_F32_LARGER_THAN_ONE))),
760            Token::Number(Ok(Number::F32(LARGEST_NORMAL_F32))),
761        ],
762    );
763    sub_test(
764        "3.40282367e+38f",
765        &[
766            Token::Number(Err(NumberError::NotRepresentable)), // ≈ 2^128
767        ],
768    );
769
770    // hexadecimal floating point
771    sub_test(
772        "0x1p-149f 0x7FFFFFp-149f 0x1p-126f 0xFFFFFFp-24f 0x800001p-23f 0xFFFFFFp+104f",
773        &[
774            Token::Number(Ok(Number::F32(SMALLEST_POSITIVE_SUBNORMAL_F32))),
775            Token::Number(Ok(Number::F32(LARGEST_SUBNORMAL_F32))),
776            Token::Number(Ok(Number::F32(SMALLEST_POSITIVE_NORMAL_F32))),
777            Token::Number(Ok(Number::F32(LARGEST_F32_LESS_THAN_ONE))),
778            Token::Number(Ok(Number::F32(SMALLEST_F32_LARGER_THAN_ONE))),
779            Token::Number(Ok(Number::F32(LARGEST_NORMAL_F32))),
780        ],
781    );
782    sub_test(
783        "0x1p128f 0x1.000001p0f",
784        &[
785            Token::Number(Err(NumberError::NotRepresentable)), // = 2^128
786            Token::Number(Err(NumberError::NotRepresentable)),
787        ],
788    );
789}
790
791#[test]
792fn double_floats() {
793    sub_test(
794        "0x1.2p4lf 0x1p8lf 0.0625lf 625e-4lf 10lf 10l",
795        &[
796            Token::Number(Ok(Number::F64(18.0))),
797            Token::Number(Ok(Number::F64(256.0))),
798            Token::Number(Ok(Number::F64(0.0625))),
799            Token::Number(Ok(Number::F64(0.0625))),
800            Token::Number(Ok(Number::F64(10.0))),
801            Token::Number(Ok(Number::AbstractInt(10))),
802            Token::Word("l"),
803        ],
804    )
805}
806
807#[test]
808fn test_tokens() {
809    sub_test("id123_OK", &[Token::Word("id123_OK")]);
810    sub_test(
811        "92No",
812        &[
813            Token::Number(Ok(Number::AbstractInt(92))),
814            Token::Word("No"),
815        ],
816    );
817    sub_test(
818        "2u3o",
819        &[
820            Token::Number(Ok(Number::U32(2))),
821            Token::Number(Ok(Number::AbstractInt(3))),
822            Token::Word("o"),
823        ],
824    );
825    sub_test(
826        "2.4f44po",
827        &[
828            Token::Number(Ok(Number::F32(2.4))),
829            Token::Number(Ok(Number::AbstractInt(44))),
830            Token::Word("po"),
831        ],
832    );
833    sub_test(
834        "Δέλτα réflexion Кызыл 𐰓𐰏𐰇 朝焼け سلام 검정 שָׁלוֹם गुलाबी փիրուզ",
835        &[
836            Token::Word("Δέλτα"),
837            Token::Word("réflexion"),
838            Token::Word("Кызыл"),
839            Token::Word("𐰓𐰏𐰇"),
840            Token::Word("朝焼け"),
841            Token::Word("سلام"),
842            Token::Word("검정"),
843            Token::Word("שָׁלוֹם"),
844            Token::Word("गुलाबी"),
845            Token::Word("փիրուզ"),
846        ],
847    );
848    sub_test("æNoø", &[Token::Word("æNoø")]);
849    sub_test("No¾", &[Token::Word("No"), Token::Unknown('¾')]);
850    sub_test("No好", &[Token::Word("No好")]);
851    sub_test("_No", &[Token::Word("_No")]);
852
853    sub_test_with_and_without_doc_comments(
854        "*/*/***/*//=/*****//",
855        &[
856            Token::Operation('*'),
857            Token::AssignmentOperation('/'),
858            Token::DocComment("/*****/"),
859            Token::Operation('/'),
860        ],
861    );
862
863    // Type suffixes are only allowed on hex float literals
864    // if you provided an exponent.
865    sub_test(
866        "0x1.2f 0x1.2f 0x1.2h 0x1.2H 0x1.2lf",
867        &[
868            // The 'f' suffixes are taken as a hex digit:
869            // the fractional part is 0x2f / 256.
870            Token::Number(Ok(Number::AbstractFloat(1.0 + 0x2f as f64 / 256.0))),
871            Token::Number(Ok(Number::AbstractFloat(1.0 + 0x2f as f64 / 256.0))),
872            Token::Number(Ok(Number::AbstractFloat(1.125))),
873            Token::Word("h"),
874            Token::Number(Ok(Number::AbstractFloat(1.125))),
875            Token::Word("H"),
876            Token::Number(Ok(Number::AbstractFloat(1.125))),
877            Token::Word("lf"),
878        ],
879    )
880}
881
882#[test]
883fn test_variable_decl() {
884    sub_test(
885        "@group(0 ) var< uniform> texture:   texture_multisampled_2d <f32 >;",
886        &[
887            Token::Attribute,
888            Token::Word("group"),
889            Token::Paren('('),
890            Token::Number(Ok(Number::AbstractInt(0))),
891            Token::Paren(')'),
892            Token::Word("var"),
893            Token::Paren('<'),
894            Token::Word("uniform"),
895            Token::Paren('>'),
896            Token::Word("texture"),
897            Token::Separator(':'),
898            Token::Word("texture_multisampled_2d"),
899            Token::Paren('<'),
900            Token::Word("f32"),
901            Token::Paren('>'),
902            Token::Separator(';'),
903        ],
904    );
905    sub_test(
906        "var<storage,read_write> buffer: array<u32>;",
907        &[
908            Token::Word("var"),
909            Token::Paren('<'),
910            Token::Word("storage"),
911            Token::Separator(','),
912            Token::Word("read_write"),
913            Token::Paren('>'),
914            Token::Word("buffer"),
915            Token::Separator(':'),
916            Token::Word("array"),
917            Token::Paren('<'),
918            Token::Word("u32"),
919            Token::Paren('>'),
920            Token::Separator(';'),
921        ],
922    );
923}
924
925#[test]
926fn test_comments() {
927    sub_test("// Single comment", &[]);
928
929    sub_test(
930        "/* multi
931    line
932    comment */",
933        &[],
934    );
935    sub_test(
936        "/* multi
937    line
938    comment */
939    // and another",
940        &[],
941    );
942}
943
944#[test]
945fn test_doc_comments() {
946    sub_test_with_and_without_doc_comments(
947        "/// Single comment",
948        &[Token::DocComment("/// Single comment")],
949    );
950
951    sub_test_with_and_without_doc_comments(
952        "/** multi
953    line
954    comment */",
955        &[Token::DocComment(
956            "/** multi
957    line
958    comment */",
959        )],
960    );
961    sub_test_with_and_without_doc_comments(
962        "/** multi
963    line
964    comment */
965    /// and another",
966        &[
967            Token::DocComment(
968                "/** multi
969    line
970    comment */",
971            ),
972            Token::DocComment("/// and another"),
973        ],
974    );
975}
976
977#[test]
978fn test_doc_comment_nested() {
979    sub_test_with_and_without_doc_comments(
980        "/**
981    a comment with nested one /**
982        nested comment
983    */
984    */
985    const a : i32 = 2;",
986        &[
987            Token::DocComment(
988                "/**
989    a comment with nested one /**
990        nested comment
991    */
992    */",
993            ),
994            Token::Word("const"),
995            Token::Word("a"),
996            Token::Separator(':'),
997            Token::Word("i32"),
998            Token::Operation('='),
999            Token::Number(Ok(Number::AbstractInt(2))),
1000            Token::Separator(';'),
1001        ],
1002    );
1003}
1004
1005#[test]
1006fn test_doc_comment_long_character() {
1007    sub_test_with_and_without_doc_comments(
1008        "/// π/2
1009        ///     D(𝐡) = ───────────────────────────────────────────────────
1010///            παₜα_b((𝐡 ⋅ 𝐭)² / αₜ²) + (𝐡 ⋅ 𝐛)² / α_b² +`
1011    const a : i32 = 2;",
1012        &[
1013            Token::DocComment("/// π/2"),
1014            Token::DocComment("///     D(𝐡) = ───────────────────────────────────────────────────"),
1015            Token::DocComment("///            παₜα_b((𝐡 ⋅ 𝐭)² / αₜ²) + (𝐡 ⋅ 𝐛)² / α_b² +`"),
1016            Token::Word("const"),
1017            Token::Word("a"),
1018            Token::Separator(':'),
1019            Token::Word("i32"),
1020            Token::Operation('='),
1021            Token::Number(Ok(Number::AbstractInt(2))),
1022            Token::Separator(';'),
1023        ],
1024    );
1025}
1026
1027#[test]
1028fn test_doc_comments_module() {
1029    sub_test_with_and_without_doc_comments(
1030        "//! Comment Module
1031        //! Another one.
1032        /*! Different module comment */
1033        /// Trying to break module comment
1034        // Trying to break module comment again
1035        //! After a regular comment is ok.
1036        /*! Different module comment again */
1037
1038        //! After a break is supported.
1039        const
1040        //! After anything else is not.",
1041        &[
1042            Token::ModuleDocComment("//! Comment Module"),
1043            Token::ModuleDocComment("//! Another one."),
1044            Token::ModuleDocComment("/*! Different module comment */"),
1045            Token::DocComment("/// Trying to break module comment"),
1046            Token::ModuleDocComment("//! After a regular comment is ok."),
1047            Token::ModuleDocComment("/*! Different module comment again */"),
1048            Token::ModuleDocComment("//! After a break is supported."),
1049            Token::Word("const"),
1050        ],
1051    );
1052}