Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Streamline lexer plumbing and simplify error recovert wrt that #620

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
135 changes: 51 additions & 84 deletions crates/codegen/parser/generator/src/parser_definition.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ pub trait ParserDefinitionNodeExtensions {

impl ParserDefinitionNodeExtensions for ParserDefinitionNode {
fn to_parser_code(&self, context_name: &'static str, is_trivia: bool) -> TokenStream {
let context = format_ident!("{context_name}");
let lex_ctx = quote! { LexicalContextType::#context };

match self {
Self::Versioned(body, _, _) => body.to_parser_code(context_name, is_trivia),

Expand Down Expand Up @@ -102,91 +105,71 @@ impl ParserDefinitionNodeExtensions for ParserDefinitionNode {

Self::ScannerDefinition(scanner_definition, _) => {
let kind = format_ident!("{name}", name = scanner_definition.name());
if is_trivia {
let function_name =
format_ident!("{}_parse_token", context_name.to_snake_case());
quote! {
self.#function_name(input, TokenKind::#kind)
}

let parse_token = if is_trivia {
format_ident!("parse_token")
} else {
let function_name =
format_ident!("{}_parse_token_with_trivia", context_name.to_snake_case());
quote! {
self.#function_name(input, TokenKind::#kind)
}
format_ident!("parse_token_with_trivia")
};

quote! {
self.#parse_token::<#lex_ctx>(input, TokenKind::#kind)
}
}

Self::TriviaParserDefinition(trivia_parser_definition, _) => {
let function_name = format_ident!(
"{snake_case}",
snake_case = trivia_parser_definition.name().to_snake_case()
);
let function_name =
format_ident!("{}", trivia_parser_definition.name().to_snake_case());

quote! { self.#function_name(input) }
}

Self::ParserDefinition(parser_definition, _) => {
if is_trivia {
unreachable!(
"Trivia productions can only reference trivia or token productions"
)
}
assert!(
OmarTawfik marked this conversation as resolved.
Show resolved Hide resolved
!is_trivia,
"Trivia productions can only reference trivia or token productions"
);

if parser_definition.is_inline() {
parser_definition.to_parser_code()
} else {
let function_name = format_ident!(
"{snake_case}",
snake_case = parser_definition.name().to_snake_case()
);
let function_name =
format_ident!("{}", parser_definition.name().to_snake_case());

quote! {
self.#function_name(input)
}
}
}

Self::PrecedenceParserDefinition(precedence_parser_definition, _) => {
if is_trivia {
unreachable!(
"Trivia productions can only reference trivia or token productions"
)
}
let function_name = format_ident!(
"{snake_case}",
snake_case = precedence_parser_definition.name().to_snake_case()
assert!(
!is_trivia,
"Trivia productions can only reference trivia or token productions"
);

let function_name =
format_ident!("{}", precedence_parser_definition.name().to_snake_case());

quote! { self.#function_name(input) }
}

Self::DelimitedBy(open, body, close, _) => {
let [open_token, close_token] = match (open.as_ref(), close.as_ref()) {
let [open_delim, close_delim] = match (open.as_ref(), close.as_ref()) {
(
ParserDefinitionNode::ScannerDefinition(open, ..),
ParserDefinitionNode::ScannerDefinition(close, ..),
) => [open, close].map(|scanner| format_ident!("{}", scanner.name())),
_ => unreachable!("Only tokens are permitted as delimiters"),
};

let parse_token = format_ident!(
"{context_name}_parse_token_with_trivia",
context_name = context_name.to_snake_case()
);

let delimiters = format_ident!(
"{context_name}_delimiters",
context_name = context_name.to_snake_case()
);

let context = format_ident!("{context_name}");

let parser = body.to_parser_code(context_name, is_trivia);
let body_parser = body.applicable_version_quality_ranges().wrap_code(
quote! {
seq.elem(#parser
.recover_until_with_nested_delims(input,
|input| Lexer::next_token::<{ LexicalContext::#context as u8 }>(self, input),
|input| Lexer::leading_trivia(self, input),
TokenKind::#close_token,
Self::#delimiters(),
.recover_until_with_nested_delims::<_, #lex_ctx>(input,
self,
TokenKind::#close_delim,
RecoverFromNoMatch::Yes,
)
)?;
Expand All @@ -196,67 +179,51 @@ impl ParserDefinitionNodeExtensions for ParserDefinitionNode {

quote! {
SequenceHelper::run(|mut seq| {
let mut delim_guard = input.open_delim(TokenKind::#close_token);
let mut delim_guard = input.open_delim(TokenKind::#close_delim);
let input = delim_guard.ctx();

seq.elem(self.#parse_token(input, TokenKind::#open_token))?;
seq.elem(self.parse_token_with_trivia::<#lex_ctx>(input, TokenKind::#open_delim))?;
#body_parser
seq.elem(self.#parse_token(input, TokenKind::#close_token))?;
seq.elem(self.parse_token_with_trivia::<#lex_ctx>(input, TokenKind::#close_delim))?;
seq.finish()
})
}
}

Self::SeparatedBy(body, separator, _) => {
let separator_scanner = match separator.as_ref() {
ParserDefinitionNode::ScannerDefinition(scanner, ..) => scanner,
let separator = match separator.as_ref() {
ParserDefinitionNode::ScannerDefinition(scanner, ..) => {
format_ident!("{name}", name = scanner.name())
}
_ => unreachable!("Only tokens are permitted as separators"),
};

let separator_token_kind = format_ident!("{name}", name = separator_scanner.name());
let context = format_ident!("{context_name}");

let parser = body.to_parser_code(context_name, is_trivia);

quote! {
SeparatedHelper::run::<{ LexicalContext::#context as u8}, Self>(
SeparatedHelper::run::<_, #lex_ctx>(
input,
|input| #parser,
TokenKind::#separator_token_kind,
self,
|input| #parser,
TokenKind::#separator,
)
}
}
Self::TerminatedBy(body, terminator, _) => {
let terminator_scanner = match terminator.as_ref() {
ParserDefinitionNode::ScannerDefinition(scanner, ..) => scanner,
let terminator = match terminator.as_ref() {
ParserDefinitionNode::ScannerDefinition(scanner, ..) => {
format_ident!("{name}", name = scanner.name())
}
_ => unreachable!("Only tokens are permitted as terminators"),
};

let terminator_token_kind =
format_ident!("{name}", name = terminator_scanner.name());

let context = format_ident!("{context_name}");

let delimiters = format_ident!(
"{context_name}_delimiters",
context_name = context_name.to_snake_case()
);

let parse_token = format_ident!(
"{context_name}_parse_token_with_trivia",
context_name = context_name.to_snake_case()
);

let parser = body.to_parser_code(context_name, is_trivia);
let body_parser = body.applicable_version_quality_ranges().wrap_code(
quote! {
seq.elem(#parser
.recover_until_with_nested_delims(input,
|input| Lexer::next_token::<{ LexicalContext::#context as u8 }>(self, input),
|input| Lexer::leading_trivia(self, input),
TokenKind::#terminator_token_kind,
Self::#delimiters(),
.recover_until_with_nested_delims::<_, #lex_ctx>(input,
self,
TokenKind::#terminator,
RecoverFromNoMatch::No,
)
)?;
Expand All @@ -267,7 +234,7 @@ impl ParserDefinitionNodeExtensions for ParserDefinitionNode {
quote! {
SequenceHelper::run(|mut seq| {
#body_parser
seq.elem(self.#parse_token(input, TokenKind::#terminator_token_kind))?;
seq.elem(self.parse_token_with_trivia::<#lex_ctx>(input, TokenKind::#terminator))?;
seq.finish()
})
}
Expand Down
10 changes: 9 additions & 1 deletion crates/codegen/parser/runtime/src/kinds.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,15 @@ pub enum ProductionKind {
#[derive(strum_macros::FromRepr)]
#[cfg_attr(feature = "slang_napi_interfaces", /* derives `Clone` and `Copy` */ napi(string_enum, namespace = "language"))]
#[cfg_attr(not(feature = "slang_napi_interfaces"), derive(Clone, Copy))]
#[repr(u8)] // This is used as a const fn argument, which only supports primitive types
pub enum LexicalContext {
XXX,
}

/// Marker trait for type-level [`LexicalContext`] variants.
pub trait IsLexicalContext {
/// Returns a run-time [`LexicalContext`] value.
fn value() -> LexicalContext;
}

#[allow(non_snake_case)]
pub mod LexicalContextType {}
43 changes: 28 additions & 15 deletions crates/codegen/parser/runtime/src/lexer.rs
Original file line number Diff line number Diff line change
@@ -1,41 +1,52 @@
use crate::{
cst,
kinds::{LexicalContext, TokenKind},
kinds::{IsLexicalContext, TokenKind},
support::{ParserContext, ParserResult},
};

// Ensure that the `LexicalContext` enum is `repr(u8)`.
// Workaround until repr(u8) enums can be used as const params.
const _ASSERT_CONTEXT_IS_REPR_U8: fn() = || {
let _ = core::mem::transmute::<u8, LexicalContext>;
};

pub trait Lexer {
// Generated by the templating engine
#[doc(hidden)]
fn next_token<const LEX_CTX: u8>(&self, input: &mut ParserContext) -> Option<TokenKind>;
fn next_token<LexCtx: IsLexicalContext>(&self, input: &mut ParserContext) -> Option<TokenKind>;
// NOTE: These are context-insensitive
#[doc(hidden)]
fn leading_trivia(&self, input: &mut ParserContext) -> ParserResult;
#[doc(hidden)]
fn trailing_trivia(&self, input: &mut ParserContext) -> ParserResult;
#[doc(hidden)]
fn delimiters<const LEX_CTX: u8>() -> &'static [(TokenKind, TokenKind)];
/// Returns valid grouping delimiters in the given lexical context.
fn delimiters<LexCtx: IsLexicalContext>() -> &'static [(TokenKind, TokenKind)];

fn peek_token<const LEX_CTX: u8>(&self, input: &mut ParserContext) -> Option<TokenKind> {
/// Peeks the next token, including trivia. Does not advance the input.
fn peek_token<LexCtx: IsLexicalContext>(&self, input: &mut ParserContext) -> Option<TokenKind> {
let start = input.position();
let token = self.next_token::<LEX_CTX>(input);
let token = self.next_token::<LexCtx>(input);
input.set_position(start);
token
}

/// Peeks the next significant (i.e. non-trivia) token. Does not advance the input.
fn peek_token_with_trivia<LexCtx: IsLexicalContext>(
&self,
input: &mut ParserContext,
) -> Option<TokenKind> {
let start = input.position();

let _ = self.leading_trivia(input);
let token = self.next_token::<LexCtx>(input);

input.set_position(start);
token
}

fn parse_token<const LEX_CTX: u8>(
/// Attempts to consume the next expected token. Advances the input only if the token matches.
fn parse_token<LexCtx: IsLexicalContext>(
&self,
input: &mut ParserContext,
kind: TokenKind,
) -> ParserResult {
let start = input.position();
if self.next_token::<LEX_CTX>(input) != Some(kind) {
if self.next_token::<LexCtx>(input) != Some(kind) {
input.set_position(start);
return ParserResult::no_match(vec![kind]);
}
Expand All @@ -47,7 +58,9 @@ pub trait Lexer {
)
}

fn parse_token_with_trivia<const LEX_CTX: u8>(
/// Attempts to consume the next significant token including both leading and trailing trivia.
/// Advances the input only if the token matches.
fn parse_token_with_trivia<LexCtx: IsLexicalContext>(
&self,
input: &mut ParserContext,
kind: TokenKind,
Expand All @@ -62,7 +75,7 @@ pub trait Lexer {
}

let start = input.position();
if self.next_token::<LEX_CTX>(input) != Some(kind) {
if self.next_token::<LexCtx>(input) != Some(kind) {
input.set_position(restore);
return ParserResult::no_match(vec![kind]);
}
Expand Down
Loading