fe_parser/
parser.rs

1pub use fe_common::diagnostics::Label;
2use fe_common::diagnostics::{Diagnostic, Severity};
3use fe_common::files::SourceFileId;
4
5use crate::lexer::{Lexer, Token, TokenKind};
6use crate::node::Span;
7use std::{error, fmt};
8
9#[derive(Debug, PartialEq, Eq, Hash, Copy, Clone)]
10pub struct ParseFailed;
11impl fmt::Display for ParseFailed {
12    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
13        write!(fmt, "ParseFailed")
14    }
15}
16impl error::Error for ParseFailed {}
17
18pub type ParseResult<T> = Result<T, ParseFailed>;
19
20/// `Parser` maintains the parsing state, such as the token stream,
21/// "enclosure" (paren, brace, ..) stack, diagnostics, etc.
22/// Syntax parsing logic is in the [`crate::grammar`] module.
23///
24/// See [`BTParser`] if you need backtrackable parser.
25pub struct Parser<'a> {
26    pub file_id: SourceFileId,
27    lexer: Lexer<'a>,
28
29    /// Tokens that have been "peeked", or split from a larger token.
30    /// Eg. `>>` may be split into two `>` tokens when parsing the end of a
31    /// generic type parameter list (eg. `Map<u256, Map<u256, address>>`).
32    buffered: Vec<Token<'a>>,
33
34    enclosure_stack: Vec<Enclosure>,
35
36    /// The diagnostics (errors and warnings) emitted during parsing.
37    pub diagnostics: Vec<Diagnostic>,
38}
39
40impl<'a> Parser<'a> {
41    /// Create a new parser for a source code string and associated file id.
42    pub fn new(file_id: SourceFileId, content: &'a str) -> Self {
43        Parser {
44            file_id,
45            lexer: Lexer::new(file_id, content),
46            buffered: vec![],
47            enclosure_stack: vec![],
48            diagnostics: vec![],
49        }
50    }
51
52    /// Returns back tracking parser.
53    pub fn as_bt_parser<'b>(&'b mut self) -> BTParser<'a, 'b> {
54        BTParser::new(self)
55    }
56
57    /// Return the next token, or an error if we've reached the end of the file.
58    #[allow(clippy::should_implement_trait)] // next() is a nice short name for a common task
59    pub fn next(&mut self) -> ParseResult<Token<'a>> {
60        self.eat_newlines_if_in_nonblock_enclosure();
61        if let Some(tok) = self.next_raw() {
62            if is_enclosure_open(tok.kind) {
63                self.enclosure_stack
64                    .push(Enclosure::non_block(tok.kind, tok.span));
65            } else if is_enclosure_close(tok.kind) {
66                if let Some(open) = self.enclosure_stack.pop() {
67                    if !enclosure_tokens_match(open.token_kind, tok.kind) {
68                        // TODO: we should search the enclosure_stack
69                        //  for the last matching enclosure open token.
70                        //  If any enclosures are unclosed, we should emit an
71                        //  error, and somehow close their respective ast nodes.
72                        //  We could synthesize the correct close token, or emit
73                        //  a special TokenKind::UnclosedEnclosure or whatever.
74                        todo!()
75                    }
76                } else {
77                    self.error(tok.span, format!("Unmatched `{}`", tok.text));
78                }
79            }
80            Ok(tok)
81        } else {
82            self.error(
83                Span::new(
84                    self.file_id,
85                    self.lexer.source().len(),
86                    self.lexer.source().len(),
87                ),
88                "unexpected end of file",
89            );
90            Err(ParseFailed)
91        }
92    }
93
94    fn next_raw(&mut self) -> Option<Token<'a>> {
95        self.buffered.pop().or_else(|| self.lexer.next())
96    }
97
98    /// Take a peek at the next token kind without consuming it, or return an
99    /// error if we've reached the end of the file.
100    pub fn peek_or_err(&mut self) -> ParseResult<TokenKind> {
101        self.eat_newlines_if_in_nonblock_enclosure();
102        if let Some(tk) = self.peek_raw() {
103            Ok(tk)
104        } else {
105            let index = self.lexer.source().len();
106            self.error(
107                Span::new(self.file_id, index, index),
108                "unexpected end of file",
109            );
110            Err(ParseFailed)
111        }
112    }
113
114    /// Take a peek at the next token kind. Returns `None` if we've reached the
115    /// end of the file.
116    pub fn peek(&mut self) -> Option<TokenKind> {
117        self.eat_newlines_if_in_nonblock_enclosure();
118        self.peek_raw()
119    }
120
121    fn peek_raw(&mut self) -> Option<TokenKind> {
122        if self.buffered.is_empty() {
123            if let Some(tok) = self.lexer.next() {
124                self.buffered.push(tok);
125            } else {
126                return None;
127            }
128        }
129        Some(self.buffered.last().unwrap().kind)
130    }
131
132    fn eat_newlines_if_in_nonblock_enclosure(&mut self) {
133        // TODO: allow newlines inside angle brackets?
134        //   eg `fn f(x: map\n <\n u8\n, ...`
135        if let Some(enc) = self.enclosure_stack.last() {
136            if !enc.is_block {
137                self.eat_newlines();
138            }
139        }
140    }
141
142    /// Split the next token into two tokens, returning the first. Only supports
143    /// splitting the `>>` token into two `>` tokens, specifically for
144    /// parsing the closing angle bracket of a generic type argument list
145    /// (`Map<x, Map<y, z>>`).
146    ///
147    /// # Panics
148    /// Panics if the next token isn't `>>`
149    pub fn split_next(&mut self) -> ParseResult<Token<'a>> {
150        let gtgt = self.next()?;
151        assert_eq!(gtgt.kind, TokenKind::GtGt);
152
153        let (gt1, gt2) = gtgt.text.split_at(1);
154        self.buffered.push(Token {
155            kind: TokenKind::Gt,
156            text: gt2,
157            span: Span::new(self.file_id, gtgt.span.start + 1, gtgt.span.end),
158        });
159
160        Ok(Token {
161            kind: TokenKind::Gt,
162            text: gt1,
163            span: Span::new(self.file_id, gtgt.span.start, gtgt.span.end - 1),
164        })
165    }
166
167    /// Returns `true` if the parser has reached the end of the file.
168    pub fn done(&mut self) -> bool {
169        self.peek_raw().is_none()
170    }
171
172    pub fn eat_newlines(&mut self) {
173        while self.peek_raw() == Some(TokenKind::Newline) {
174            self.next_raw();
175        }
176    }
177
178    /// Assert that the next token kind it matches the expected token
179    /// kind, and return it. This should be used in cases where the next token
180    /// kind is expected to have been checked already.
181    ///
182    /// # Panics
183    /// Panics if the next token kind isn't `tk`.
184    pub fn assert(&mut self, tk: TokenKind) -> Token<'a> {
185        let tok = self.next().unwrap();
186        assert_eq!(tok.kind, tk, "internal parser error");
187        tok
188    }
189
190    /// If the next token matches the expected kind, return it. Otherwise emit
191    /// an error diagnostic with the given message and return an error.
192    pub fn expect<S: Into<String>>(
193        &mut self,
194        expected: TokenKind,
195        message: S,
196    ) -> ParseResult<Token<'a>> {
197        self.expect_with_notes(expected, message, |_| Vec::new())
198    }
199
200    /// Like [`Parser::expect`], but with additional notes to be appended to the
201    /// bottom of the diagnostic message. The notes are provided by a
202    /// function that returns a `Vec<String>`, to avoid allocations in the
203    /// case where the token is as expected.
204    pub fn expect_with_notes<Str, NotesFn>(
205        &mut self,
206        expected: TokenKind,
207        message: Str,
208        notes_fn: NotesFn,
209    ) -> ParseResult<Token<'a>>
210    where
211        Str: Into<String>,
212        NotesFn: FnOnce(&Token) -> Vec<String>,
213    {
214        let tok = self.next()?;
215        if tok.kind == expected {
216            Ok(tok)
217        } else {
218            self.fancy_error(
219                message.into(),
220                vec![Label::primary(
221                    tok.span,
222                    format!(
223                        "expected {}, found {}",
224                        expected.describe(),
225                        tok.kind.describe()
226                    ),
227                )],
228                notes_fn(&tok),
229            );
230            Err(ParseFailed)
231        }
232    }
233
234    /// If the next token matches the expected kind, return it. Otherwise return
235    /// None.
236    pub fn optional(&mut self, kind: TokenKind) -> Option<Token<'a>> {
237        if self.peek() == Some(kind) {
238            Some(self.next().unwrap())
239        } else {
240            None
241        }
242    }
243
244    /// Emit an "unexpected token" error diagnostic with the given message.
245    pub fn unexpected_token_error<S: Into<String>>(
246        &mut self,
247        tok: &Token,
248        message: S,
249        notes: Vec<String>,
250    ) {
251        self.fancy_error(
252            message,
253            vec![Label::primary(tok.span, "unexpected token")],
254            notes,
255        );
256    }
257
258    /// Enter a "block", which is a brace-enclosed list of statements,
259    /// separated by newlines and/or semicolons.
260    /// This checks for and consumes the `{` that precedes the block.
261    pub fn enter_block(&mut self, context_span: Span, context_name: &str) -> ParseResult<()> {
262        if self.peek_raw() == Some(TokenKind::BraceOpen) {
263            let tok = self.next_raw().unwrap();
264            self.enclosure_stack.push(Enclosure::block(tok.span));
265            self.eat_newlines();
266            Ok(())
267        } else {
268            self.fancy_error(
269                format!("{context_name} must start with `{{`"),
270                vec![Label::primary(
271                    Span::new(self.file_id, context_span.end, context_span.end),
272                    "expected `{` here",
273                )],
274                vec![],
275            );
276            Err(ParseFailed)
277        }
278    }
279
280    /// Consumes newlines and semicolons. Returns Ok if one or more newlines or
281    /// semicolons are consumed, or if the next token is a `}`.
282    pub fn expect_stmt_end(&mut self, context_name: &str) -> ParseResult<()> {
283        let mut newline = false;
284        while matches!(self.peek_raw(), Some(TokenKind::Newline | TokenKind::Semi)) {
285            newline = true;
286            self.next_raw().unwrap();
287        }
288        if newline {
289            return Ok(());
290        }
291        match self.peek_raw() {
292            Some(TokenKind::BraceClose) => Ok(()),
293            Some(_) => {
294                let tok = self.next()?;
295                self.unexpected_token_error(
296                    &tok,
297                    format!("unexpected token while parsing {context_name}"),
298                    vec![format!(
299                        "expected a newline; found {} instead",
300                        tok.kind.describe()
301                    )],
302                );
303                Err(ParseFailed)
304            }
305            None => Ok(()), // unexpect eof error will be generated be parent block
306        }
307    }
308
309    /// Emit an error diagnostic, but don't stop parsing
310    pub fn error<S: Into<String>>(&mut self, span: Span, message: S) {
311        self.diagnostics.push(Diagnostic {
312            severity: Severity::Error,
313            message: message.into(),
314            labels: vec![Label::primary(span, "")],
315            notes: vec![],
316        })
317    }
318
319    /// Emit a "fancy" error diagnostic with any number of labels and notes,
320    /// but don't stop parsing.
321    pub fn fancy_error<S: Into<String>>(
322        &mut self,
323        message: S,
324        labels: Vec<Label>,
325        notes: Vec<String>,
326    ) {
327        self.diagnostics.push(Diagnostic {
328            severity: Severity::Error,
329            message: message.into(),
330            labels,
331            notes,
332        })
333    }
334}
335
336/// A thin wrapper that makes [`Parser`] backtrackable.
337pub struct BTParser<'a, 'b> {
338    snapshot: &'b mut Parser<'a>,
339    parser: Parser<'a>,
340}
341
342impl<'a, 'b> BTParser<'a, 'b> {
343    pub fn new(snapshot: &'b mut Parser<'a>) -> Self {
344        let parser = Parser {
345            file_id: snapshot.file_id,
346            lexer: snapshot.lexer.clone(),
347            buffered: snapshot.buffered.clone(),
348            enclosure_stack: snapshot.enclosure_stack.clone(),
349            diagnostics: Vec::new(),
350        };
351        Self { snapshot, parser }
352    }
353
354    pub fn accept(self) {
355        self.snapshot.lexer = self.parser.lexer;
356        self.snapshot.buffered = self.parser.buffered;
357        self.snapshot.enclosure_stack = self.parser.enclosure_stack;
358        self.snapshot.diagnostics.extend(self.parser.diagnostics);
359    }
360}
361
362impl<'a, 'b> std::ops::Deref for BTParser<'a, 'b> {
363    type Target = Parser<'a>;
364
365    fn deref(&self) -> &Self::Target {
366        &self.parser
367    }
368}
369
370impl<'a, 'b> std::ops::DerefMut for BTParser<'a, 'b> {
371    fn deref_mut(&mut self) -> &mut Self::Target {
372        &mut self.parser
373    }
374}
375
376/// The start position of a chunk of code enclosed by (), [], or {}.
377/// (This has nothing to do with "closures".)
378///
379/// Note that <> isn't currently considered an enclosure, as `<` might be
380/// a less-than operator, while the rest are unambiguous.
381///
382/// A `{}` enclosure may or may not be a block. A block contains a list of
383/// statements, separated by newlines or semicolons (or both).
384/// Semicolons and newlines are consumed by `par.expect_stmt_end()`.
385///
386/// Non-block enclosures contains zero or more expressions, maybe separated by
387/// commas. When the top-most enclosure is a non-block, newlines are ignored
388/// (by `par.next()`), and semicolons are just normal tokens that'll be
389/// rejected by the parsing fns in grammar/.
390#[derive(Clone, Debug)]
391struct Enclosure {
392    token_kind: TokenKind,
393    _token_span: Span, // TODO: mark mismatched tokens
394    is_block: bool,
395}
396impl Enclosure {
397    pub fn block(token_span: Span) -> Self {
398        Self {
399            token_kind: TokenKind::BraceOpen,
400            _token_span: token_span,
401            is_block: true,
402        }
403    }
404    pub fn non_block(token_kind: TokenKind, token_span: Span) -> Self {
405        Self {
406            token_kind,
407            _token_span: token_span,
408            is_block: false,
409        }
410    }
411}
412
413fn is_enclosure_open(tk: TokenKind) -> bool {
414    use TokenKind::*;
415    matches!(tk, ParenOpen | BraceOpen | BracketOpen)
416}
417
418fn is_enclosure_close(tk: TokenKind) -> bool {
419    use TokenKind::*;
420    matches!(tk, ParenClose | BraceClose | BracketClose)
421}
422
423fn enclosure_tokens_match(open: TokenKind, close: TokenKind) -> bool {
424    use TokenKind::*;
425    matches!(
426        (open, close),
427        (ParenOpen, ParenClose) | (BraceOpen, BraceClose) | (BracketOpen, BracketClose)
428    )
429}
fe_parser/parser.rs

fe_parser/
parser.rs