Skip to content

Commit

Permalink
Merge pull request #28320 from ProvableHQ/fix/doc-parser
Browse files Browse the repository at this point in the history
Update and improve some parser documentation.
  • Loading branch information
d0cd authored Aug 14, 2024
2 parents e55c219 + 5e1d347 commit 9d247cf
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 34 deletions.
64 changes: 42 additions & 22 deletions compiler/parser/src/parser/expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -400,22 +400,22 @@ impl<N: Network> ParserContext<'_, N> {
/// Returns an [`Expression`] AST node if the next tokens represent a
/// static access expression.
fn parse_associated_access_expression(&mut self, module_name: Expression) -> Result<Expression> {
// Parse struct name expression into struct type.
// Ensure that the preceding expression is an identifier (a named type).
let variant = if let Expression::Identifier(ident) = module_name {
ident
} else {
return Err(ParserError::invalid_associated_access(&module_name, module_name.span()).into());
};

// Parse the struct member name (can be variable or function name).
// Parse the constant or function name.
let member_name = self.expect_identifier()?;

// Check if there are arguments.
Ok(Expression::Access(if self.check(&Token::LeftParen) {
// Parse the arguments
let (args, _, end) = self.parse_expr_tuple()?;

// Return the struct function.
// Return the associated function.
AccessExpression::AssociatedFunction(AssociatedFunction {
span: module_name.span() + end,
variant,
Expand All @@ -424,7 +424,7 @@ impl<N: Network> ParserContext<'_, N> {
id: self.node_builder.next_id(),
})
} else {
// Return the struct constant.
// Return the associated constant.
AccessExpression::AssociatedConstant(AssociatedConstant {
span: module_name.span() + member_name.span(),
ty: Type::Identifier(variant),
Expand All @@ -439,15 +439,20 @@ impl<N: Network> ParserContext<'_, N> {
self.parse_paren_comma_list(|p| p.parse_expression().map(Some))
}

// Parses an external function call `credits.aleo/transfer()` or locator `token.aleo/accounts`.
/// Parses an external function call `credits.aleo/transfer()` or locator `token.aleo/accounts`.
///
/// In the ABNF grammar,
/// an external function call is one of the two kinds of free function calls,
/// namely the one that uses a locator to designate the function;
/// a locator is a kind of primary expression.
fn parse_external_resource(&mut self, expr: Expression, network_span: Span) -> Result<Expression> {
// Parse `/`.
self.expect(&Token::Div)?;

// Parse name.
let name = self.expect_identifier()?;

// Parse the parent program identifier.
// Ensure the preceding expression is a (program) identifier.
let program: Identifier = match expr {
Expression::Identifier(identifier) => identifier,
_ => unreachable!("Function called must be preceded by a program identifier."),
Expand Down Expand Up @@ -485,13 +490,17 @@ impl<N: Network> ParserContext<'_, N> {
}

/// Returns an [`Expression`] AST node if the next tokens represent an
/// array access, struct member access, function call, or static function call expression.
/// array access, struct member access, tuple access, or method call expression.
///
/// Otherwise, tries to parse the next token using [`parse_primary_expression`].
/// Note that, as mentioned in [`parse_primary_expression`],
/// this function also completes the parsing of some primary expressions
/// (as defined in the ABNF grammar),
/// which [`parse_primary_expression`] only starts to parse.
fn parse_postfix_expression(&mut self) -> Result<Expression> {
// We don't directly parse named-type's and Identifier's here as
// the ABNF states. Rather the primary expression already
// handle those. The ABNF is more specific for language reasons.
// We don't directly parse named types and identifiers in associated constants and functions
// here as the ABNF states. Rather, those named types and identifiers are parsed
// as primary expressions, and combined to form associated constants and functions here.
let mut expr = self.parse_primary_expression()?;
loop {
if self.eat(&Token::Dot) {
Expand Down Expand Up @@ -553,7 +562,7 @@ impl<N: Network> ParserContext<'_, N> {
}
}
} else if self.eat(&Token::DoubleColon) {
// Eat a core struct constant or core struct function call.
// Eat a core associated constant or core associated function call.
expr = self.parse_associated_access_expression(expr)?;
} else if self.eat(&Token::LeftSquare) {
// Eat an array access.
Expand Down Expand Up @@ -581,16 +590,17 @@ impl<N: Network> ParserContext<'_, N> {
id: self.node_builder.next_id(),
});
}
// Check if next token is a dot to see if we are calling recursive method.
// Stop parsing the postfix expression unless a dot or square bracket follows.
if !(self.check(&Token::Dot) || self.check(&Token::LeftSquare)) {
break;
}
}
Ok(expr)
}

/// Returns an [`Expression`] AST node if the next tokens represent a
/// tuple initialization expression or an affine group literal.
/// Returns an [`Expression`] AST node if the next tokens represent
/// a parenthesized expression or a unit expression
/// or a tuple initialization expression or an affine group literal.
fn parse_tuple_expression(&mut self) -> Result<Expression> {
if let Some(gt) = self.eat_group_partial().transpose()? {
return Ok(Expression::Literal(Literal::Group(Box::new(GroupLiteral::Tuple(gt)))));
Expand Down Expand Up @@ -643,14 +653,14 @@ impl<N: Network> ParserContext<'_, N> {
Some(gc)
}

/// Removes the next two tokens if they are a pair of [`GroupCoordinate`] and returns them,
/// or [None] if the next token is not a [`GroupCoordinate`].
/// Attempts to parse an affine group literal, if present.
/// If absent, returns [None].
fn eat_group_partial(&mut self) -> Option<Result<GroupTuple>> {
assert!(self.check(&Token::LeftParen)); // `(`.

// Peek at first gc.
// Peek at first group coordinate.
let start_span = &self.token.span;
let mut dist = 1; // 0th is `(` so 1st is first gc's start.
let mut dist = 1; // 0th is `(` so 1st is first group coordinate's start.
let first_gc = self.peek_group_coordinate(&mut dist)?;

let check_ahead = |d, token: &_| self.look_ahead(d, |t| (&t.token == token).then_some(t.span));
Expand All @@ -659,7 +669,7 @@ impl<N: Network> ParserContext<'_, N> {
check_ahead(dist, &Token::Comma)?;
dist += 1; // Standing at `,` so advance one for next gc's start.

// Peek at second gc.
// Peek at second group coordinate.
let second_gc = self.peek_group_coordinate(&mut dist)?;

// Peek at `)`.
Expand All @@ -678,6 +688,7 @@ impl<N: Network> ParserContext<'_, N> {
self.bump();
}

// Ensure that the ending `)` and `group` are treated as one token `)group` as in the ABNF grammar:
if let Err(e) = assert_no_whitespace(right_paren_span, end_span, &format!("({},{})", gt.x, gt.y), "group") {
return Some(Err(e));
}
Expand Down Expand Up @@ -716,10 +727,19 @@ impl<N: Network> ParserContext<'_, N> {
}

/// Returns an [`Expression`] AST node if the next token is a primary expression:
/// - Literals: field, group, unsigned integer, signed integer, boolean, address
/// - Aggregate types: array, tuple
/// - Literals: field, group, unsigned integer, signed integer, boolean, address, string
/// - Aggregate type constructors: array, tuple, structs
/// - Identifiers: variables, keywords
/// - self
///
/// This function only parses some of the primary expressions defined in the ABNF grammar;
/// for the others, it parses their initial parts,
/// leaving it to the [self.parse_postfix_expression] function to complete the parsing.
/// For example, of the primary expression `u8::c`, this function only parses the `u8` part,
/// leaving it to [self.parse_postfix_expression] to parse the `::c` part.
/// So technically the expression returned by this function may not quite be
/// an expression as defined in the ABNF grammar,
/// but it is only a temporary expression that is combined into a larger one
/// by [self.parse_postfix_expression], yielding an actual expression according to the grammar.
///
/// Returns an expression error if the token cannot be matched.
fn parse_primary_expression(&mut self) -> Result<Expression> {
Expand Down
6 changes: 3 additions & 3 deletions compiler/parser/src/parser/statement.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@ const ASSIGN_TOKENS: &[Token] = &[
Token::AndAssign,
Token::BitAndAssign,
Token::BitOrAssign,
Token::BitXorAssign,
Token::ShrAssign,
Token::ShlAssign,
Token::BitXorAssign,
];

impl<N: Network> ParserContext<'_, N> {
Expand All @@ -52,7 +52,7 @@ impl<N: Network> ParserContext<'_, N> {
}
}

/// Returns a [`AssertStatement`] AST node if the next tokens represent an assertion statement.
/// Returns an [`AssertStatement`] AST node if the next tokens represent an assertion statement.
fn parse_assert_statement(&mut self) -> Result<Statement> {
// Check which variant of the assert statement is being used.
// Note that `parse_assert_statement` is called only if the next token is an assertion token.
Expand Down Expand Up @@ -85,7 +85,7 @@ impl<N: Network> ParserContext<'_, N> {
Ok(Statement::Assert(AssertStatement { variant, span, id: self.node_builder.next_id() }))
}

/// Returns a [`AssignStatement`] AST node if the next tokens represent a assign, otherwise expects an expression statement.
/// Returns an [`AssignStatement`] AST node if the next tokens represent an assignment, otherwise expects an expression statement.
fn parse_assign_statement(&mut self) -> Result<Statement> {
let place = self.parse_expression()?;

Expand Down
5 changes: 4 additions & 1 deletion compiler/parser/src/parser/type_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ impl<N: Network> ParserContext<'_, N> {

/// Returns a [`(Type, Span)`] tuple of AST nodes if the next token represents a primitive type.
/// Also returns the span of the parsed token.
///
/// These correspond to what the ABNF grammar calls 'named primitive types';
/// the 'primitive types' according to the ABNF grammar include also the unit type.
pub fn parse_primitive_type(&mut self) -> Result<(Type, Span)> {
let span = self.expect_any(TYPE_TOKENS)?;
Ok((
Expand Down Expand Up @@ -138,7 +141,7 @@ impl<N: Network> ParserContext<'_, N> {
// Expect the sequence `<`, `Fn`.
self.expect(&Token::Lt)?;
self.expect(&Token::Fn)?;
// Parse the parenthesis list of function arguments.
// Parse the parenthesized list of function arguments.
let (types, _, full_span) = self.parse_paren_comma_list(|p| p.parse_type().map(Some))?;
// Expect the closing `>`.
self.expect(&Token::Gt)?;
Expand Down
17 changes: 9 additions & 8 deletions compiler/parser/src/tokenizer/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@ pub enum Token {
// The numeric literals in the ABNF grammar, which consist of numerals followed by types,
// are represented not as single tokens here,
// but as two separate tokens (one for the numeral and one for the type),
// enforcing, during parsing, the absence of whitespace or comments between those two tokens.
// enforcing, during parsing, the absence of whitespace or comments between those two tokens
// (see the parse_primary_expression function).

// Identifiers
Identifier(Symbol),
Expand Down Expand Up @@ -104,7 +105,7 @@ pub enum Token {
Underscore,
At, // @ is not a symbol token in the ABNF grammar (see explanation about annotations below)
// There is no symbol for `)group` here (unlike the ABNF grammar),
// because we handle that differently in the lexer.
// because we handle that differently in the parser: see the eat_group_partial function.

// The ABNF grammar has annotations as tokens,
// defined as @ immediately followed by an identifier.
Expand Down Expand Up @@ -136,6 +137,7 @@ pub enum Token {
U128,

// Other keywords
Aleo,
As,
Assert,
AssertEq,
Expand All @@ -156,6 +158,7 @@ pub enum Token {
Inline,
Let,
Mapping,
Network,
Private,
Program,
Public,
Expand All @@ -164,10 +167,8 @@ pub enum Token {
Transition,

// Meta tokens
Aleo,
Eof,
Leo,
Network,
Eof, // used to signal end-of-file, not an actual token of the language
Leo, // only used for error messages, not an actual keyword
}

/// Represents all valid Leo keyword tokens.
Expand Down Expand Up @@ -376,6 +377,7 @@ impl fmt::Display for Token {
U64 => write!(f, "u64"),
U128 => write!(f, "u128"),

Aleo => write!(f, "aleo"),
As => write!(f, "as"),
Assert => write!(f, "assert"),
AssertEq => write!(f, "assert_eq"),
Expand All @@ -396,17 +398,16 @@ impl fmt::Display for Token {
Inline => write!(f, "inline"),
Let => write!(f, "let"),
Mapping => write!(f, "mapping"),
Network => write!(f, "network"),
Private => write!(f, "private"),
Program => write!(f, "program"),
Public => write!(f, "public"),
Return => write!(f, "return"),
SelfLower => write!(f, "self"),
Transition => write!(f, "transition"),

Aleo => write!(f, "aleo"),
Eof => write!(f, "<eof>"),
Leo => write!(f, "leo"),
Network => write!(f, "network"),
}
}
}
Expand Down

0 comments on commit 9d247cf

Please sign in to comment.