Skip to content

Commit

Permalink
use a HashSet for parser expectations
Browse files Browse the repository at this point in the history
Instead of a `Vec`, to remove the many duplicates we can generate during parsing.
  • Loading branch information
OmarTawfik committed Aug 9, 2024
1 parent 30f3785 commit 6f32d0c
Show file tree
Hide file tree
Showing 33 changed files with 236 additions and 334 deletions.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 6 additions & 4 deletions crates/codegen/runtime/cargo/src/runtime/language/lexer/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use std::collections::HashSet;

use crate::cst::{self, Edge};
use crate::kinds::{IsLexicalContext, TerminalKind};
use crate::language::parser_support::{ParserContext, ParserResult};
Expand Down Expand Up @@ -95,7 +97,7 @@ pub(crate) trait Lexer {
.is_some_and(|t| t.accepted_as(kind))
{
input.set_position(start);
return ParserResult::no_match(vec![kind]);
return ParserResult::no_match(HashSet::from([kind]));
}
let end = input.position();

Expand All @@ -104,7 +106,7 @@ pub(crate) trait Lexer {
kind,
input.content(start.utf8..end.utf8),
))],
vec![],
HashSet::new(),
)
}

Expand All @@ -130,7 +132,7 @@ pub(crate) trait Lexer {
.is_some_and(|t| t.accepted_as(kind))
{
input.set_position(restore);
return ParserResult::no_match(vec![kind]);
return ParserResult::no_match(HashSet::from([kind]));
}
let end = input.position();
children.push(Edge::anonymous(cst::Node::terminal(
Expand All @@ -145,6 +147,6 @@ pub(crate) trait Lexer {
input.set_position(restore);
}

ParserResult::r#match(children, vec![])
ParserResult::r#match(children, HashSet::new())
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
unused_imports
)]

use std::collections::HashSet;

use semver::Version;
#[cfg(feature = "__private_napi_interfaces")]
use napi_derive::napi;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use std::collections::HashSet;
use std::mem;
use std::ops::ControlFlow;

Expand All @@ -24,7 +25,7 @@ pub struct ChoiceHelper {
impl ChoiceHelper {
pub fn new(input: &mut ParserContext<'_>) -> Self {
Self {
result: ParserResult::no_match(vec![]),
result: ParserResult::no_match(HashSet::new()),
start_position: input.mark(),
recovered_errors: vec![],
last_progress: input.position(),
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use std::collections::HashSet;
use std::rc::Rc;

use metaslang_cst::TerminalKind as _;
Expand Down Expand Up @@ -92,7 +93,7 @@ where

ParserResult::SkippedUntil(SkippedUntil {
nodes, expected, ..
}) => (nodes, vec![expected]),
}) => (nodes, HashSet::from([expected])),
};

let topmost_node = match &nodes[..] {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use std::collections::HashSet;
use std::ops::ControlFlow;

use metaslang_cst::TerminalKind as _;
Expand All @@ -18,30 +19,33 @@ pub enum ParserResult {
impl Default for ParserResult {
fn default() -> Self {
Self::NoMatch(NoMatch {
expected_terminals: vec![],
expected_terminals: HashSet::new(),
})
}
}

impl ParserResult {
pub fn r#match(nodes: Vec<cst::Edge>, expected_terminals: Vec<TerminalKind>) -> Self {
pub fn r#match(nodes: Vec<cst::Edge>, expected_terminals: HashSet<TerminalKind>) -> Self {
ParserResult::Match(Match::new(nodes, expected_terminals))
}

pub fn pratt_operator_match(elements: Vec<PrattElement>) -> Self {
ParserResult::PrattOperatorMatch(PrattOperatorMatch::new(elements))
}

pub fn incomplete_match(nodes: Vec<cst::Edge>, expected_terminals: Vec<TerminalKind>) -> Self {
pub fn incomplete_match(
nodes: Vec<cst::Edge>,
expected_terminals: HashSet<TerminalKind>,
) -> Self {
ParserResult::IncompleteMatch(IncompleteMatch::new(nodes, expected_terminals))
}

/// Whenever a parser didn't run because it's disabled due to versioning. Shorthand for `no_match(vec![])`.
pub fn disabled() -> Self {
Self::no_match(vec![])
Self::no_match(HashSet::new())
}

pub fn no_match(expected_terminals: Vec<TerminalKind>) -> Self {
pub fn no_match(expected_terminals: HashSet<TerminalKind>) -> Self {
ParserResult::NoMatch(NoMatch::new(expected_terminals))
}

Expand Down Expand Up @@ -123,11 +127,11 @@ impl ParserResult {
pub struct Match {
pub nodes: Vec<cst::Edge>,
/// Terminals that would have allowed for more progress. Collected for the purposes of error reporting.
pub expected_terminals: Vec<TerminalKind>,
pub expected_terminals: HashSet<TerminalKind>,
}

impl Match {
pub fn new(nodes: Vec<cst::Edge>, expected_terminals: Vec<TerminalKind>) -> Self {
pub fn new(nodes: Vec<cst::Edge>, expected_terminals: HashSet<TerminalKind>) -> Self {
Self {
nodes,
expected_terminals,
Expand Down Expand Up @@ -197,11 +201,11 @@ impl PrattOperatorMatch {
pub struct IncompleteMatch {
pub nodes: Vec<cst::Edge>,
/// Terminals that would have allowed for more progress. Collected for the purposes of error reporting.
pub expected_terminals: Vec<TerminalKind>,
pub expected_terminals: HashSet<TerminalKind>,
}

impl IncompleteMatch {
pub fn new(nodes: Vec<cst::Edge>, expected_terminals: Vec<TerminalKind>) -> Self {
pub fn new(nodes: Vec<cst::Edge>, expected_terminals: HashSet<TerminalKind>) -> Self {
Self {
nodes,
expected_terminals,
Expand Down Expand Up @@ -239,11 +243,11 @@ impl IncompleteMatch {
#[derive(PartialEq, Eq, Clone, Debug)]
pub struct NoMatch {
/// Terminals that would have allowed for more progress. Collected for the purposes of error reporting.
pub expected_terminals: Vec<TerminalKind>,
pub expected_terminals: HashSet<TerminalKind>,
}

impl NoMatch {
pub fn new(expected_terminals: Vec<TerminalKind>) -> Self {
pub fn new(expected_terminals: HashSet<TerminalKind>) -> Self {
Self { expected_terminals }
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use std::collections::HashSet;

use crate::cst::{self, Edge};
use crate::kinds::{EdgeLabel, NonterminalKind};
use crate::language::parser_support::parser_result::PrattElement::{
Expand Down Expand Up @@ -237,7 +239,7 @@ impl PrecedenceHelper {
// 3. Until we have a single expression.

match <[_; 1]>::try_from(elements) {
Ok([Expression { nodes }]) => ParserResult::r#match(nodes, vec![]),
Ok([Expression { nodes }]) => ParserResult::r#match(nodes, HashSet::new()),
Ok([head]) => unreachable!("Expected an expression: {:#?}", head),
Err(elems) => unreachable!("Expected a single element: {:#?}", elems),
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ impl ParserResult {
{
nodes.extend(leading_trivia);
if matches!(result_kind, ParseResultKind::Match) {
expected_terminals.push(expected);
expected_terminals.insert(expected);
}

let skipped = input.content(skipped_range.utf8());
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use std::collections::HashSet;

use crate::cst::{self, Edge};
use crate::kinds::{EdgeLabel, IsLexicalContext, TerminalKind};
use crate::language::lexer::Lexer;
Expand Down Expand Up @@ -41,7 +43,9 @@ impl SeparatedHelper {
// NOTE: We can't correctly attempt recovery until #600 lands, otherwise we'd risk misparses,
// as we need to stop at certain synchronizing terminals (and we can't reliably scan until
// a delimiter, as not every list is enclosed in a delimited group).
Some(..) | None => return ParserResult::r#match(accum, vec![separator]),
Some(..) | None => {
return ParserResult::r#match(accum, HashSet::from([separator]))
}
}
}
// Body was partially parsed, so try to recover by skipping terminals until we see a separator
Expand Down
43 changes: 24 additions & 19 deletions crates/codegen/runtime/cargo/src/runtime/parse_error/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::collections::BTreeSet;
use std::collections::HashSet;
use std::fmt;

use crate::diagnostic::{self, Diagnostic};
Expand All @@ -11,7 +11,7 @@ use crate::text_index::TextRange;
#[derive(Debug, PartialEq, Eq, Clone)]
pub struct ParseError {
pub(crate) text_range: TextRange,
pub(crate) terminals_that_would_have_allowed_more_progress: Vec<TerminalKind>,
pub(crate) terminals_that_would_have_allowed_more_progress: HashSet<TerminalKind>,
}

impl ParseError {
Expand All @@ -29,7 +29,7 @@ impl ParseError {
impl ParseError {
pub(crate) fn new(
text_range: TextRange,
terminals_that_would_have_allowed_more_progress: Vec<TerminalKind>,
terminals_that_would_have_allowed_more_progress: HashSet<TerminalKind>,
) -> Self {
Self {
text_range,
Expand All @@ -44,23 +44,28 @@ impl fmt::Display for ParseError {
.terminals_that_would_have_allowed_more_progress
.is_empty()
{
write!(f, "Expected end of file.")
} else {
let deduped = self
.terminals_that_would_have_allowed_more_progress
.iter()
.collect::<BTreeSet<_>>();

write!(f, "Expected ")?;

for kind in deduped.iter().take(deduped.len() - 1) {
write!(f, "{kind} or ")?;
}
let last = deduped.last().expect("we just checked that it's not empty");
write!(f, "{last}.")?;

Ok(())
return write!(f, "Expected end of file.");
}

let mut ordered = self
.terminals_that_would_have_allowed_more_progress
.iter()
.collect::<Vec<_>>();

ordered.sort();

let mut ordered = ordered.iter();
let first = ordered.next().expect("we just checked that it's not empty");

write!(f, "Expected {first}")?;

for kind in ordered {
write!(f, " or {kind}")?;
}

write!(f, ".")?;

Ok(())
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,9 @@ impl PrecedenceParserDefinitionCodegen for PrecedenceParserDefinitionRef {
[inner @ cst::Edge { node: cst::Node::Nonterminal(node), .. }] if node.kind == NonterminalKind::#op_nonterminal_name => {
ParserResult::r#match(vec![inner.clone()], r#match.expected_terminals.clone())
}
_ => ParserResult::no_match(vec![]),
_ => ParserResult::no_match(HashSet::new()),
}
_ => ParserResult::no_match(vec![]),
_ => ParserResult::no_match(HashSet::new()),
}
};

Expand Down
Loading

0 comments on commit 6f32d0c

Please sign in to comment.