From 7bb650b12ae793a318dc5b7839fb93915c88828e Mon Sep 17 00:00:00 2001 From: Igor Matuszewski Date: Tue, 31 Oct 2023 11:52:42 +0100 Subject: [PATCH] Implement Iterator for Cursor (#625) Closes #609 Still needs tweaking to adapt to the current Cursor's behavior and needs more documentation/tests for the Visitor trait. --- .changeset/hip-mails-wonder.md | 5 + crates/codegen/parser/runtime/src/cursor.rs | 334 +++++++++++------- .../runtime/src/support/choice_helper.rs | 12 +- .../runtime/src/support/parser_function.rs | 11 +- .../runtime/src/support/parser_result.rs | 37 +- crates/codegen/parser/runtime/src/visitor.rs | 69 ++-- .../cargo/crate/src/generated/cursor.rs | 334 +++++++++++------- .../src/generated/support/choice_helper.rs | 12 +- .../src/generated/support/parser_function.rs | 11 +- .../src/generated/support/parser_result.rs | 37 +- .../cargo/crate/src/generated/visitor.rs | 69 ++-- .../tests/src/doc_examples/cursor_api.rs | 34 ++ .../tests/src/doc_examples/visitor_api.rs | 10 +- .../outputs/npm/crate/src/generated/cursor.rs | 334 +++++++++++------- .../src/generated/support/choice_helper.rs | 12 +- .../src/generated/support/parser_function.rs | 11 +- .../src/generated/support/parser_result.rs | 37 +- .../npm/crate/src/generated/visitor.rs | 69 ++-- .../utils/src/cst_snapshots/test_nodes.rs | 26 +- .../public/user-guide/cargo-crate/index.md | 6 +- 20 files changed, 807 insertions(+), 663 deletions(-) create mode 100644 .changeset/hip-mails-wonder.md diff --git a/.changeset/hip-mails-wonder.md b/.changeset/hip-mails-wonder.md new file mode 100644 index 0000000000..9028003d16 --- /dev/null +++ b/.changeset/hip-mails-wonder.md @@ -0,0 +1,5 @@ +--- +"@nomicfoundation/slang": minor +--- + +The CST Cursor now implements the Iterator trait as part of the Rust API diff --git a/crates/codegen/parser/runtime/src/cursor.rs b/crates/codegen/parser/runtime/src/cursor.rs index 16b26eec78..658f5f0c40 100644 --- a/crates/codegen/parser/runtime/src/cursor.rs +++ b/crates/codegen/parser/runtime/src/cursor.rs @@ -1,3 +1,5 @@ +//! A cursor that can traverse a CST in a DFS pre-order fashion. + use std::rc::Rc; use super::{ @@ -6,41 +8,89 @@ use super::{ text_index::{TextIndex, TextRange}, }; +/// A [`PathNode`] that points to a [`RuleNode`]. #[derive(Clone, Debug, PartialEq, Eq)] -struct CursorPathElement { +struct PathRuleNode { rule_node: Rc, child_number: usize, text_offset: TextIndex, } +impl PathRuleNode { + fn into_path_node(self) -> PathNode { + PathNode { + node: Node::Rule(self.rule_node), + child_number: self.child_number, + text_offset: self.text_offset, + } + } +} + +/// A pointer to a [`Node`] in a CST, used by the [`Cursor`] to implement the traversal. #[derive(Clone, Debug, PartialEq, Eq)] -struct CursorLeaf { +struct PathNode { + /// The node the cursor is currently pointing to. node: Node, + /// The index of the current child node in the parent's children. + // Required to go to the next/previous sibling. child_number: usize, + /// Text offset that corresponds to the beginning of the currently pointed to node. text_offset: TextIndex, } -impl CursorLeaf { - pub fn text_range(&self) -> TextRange { +impl PathNode { + fn text_range(&self) -> TextRange { let start = self.text_offset; let end = start + self.node.text_len(); start..end } + + fn to_path_rule_node(&self) -> Option { + if let Node::Rule(rule_node) = &self.node { + Some(PathRuleNode { + rule_node: rule_node.clone(), + child_number: self.child_number, + text_offset: self.text_offset, + }) + } else { + None + } + } } +/// A cursor that can traverse a CST. +/// +/// Nodes are visited in a DFS pre-order traversal. #[derive(Clone, Debug, PartialEq, Eq)] pub struct Cursor { - path: Vec, - leaf: CursorLeaf, + /// The list of ancestor rule nodes that the `current` node is a part of. + path: Vec, + /// The node the cursor is currently pointing to. + current: PathNode, + /// Whether the cursor is completed, i.e. at the root node as a result of traversal (or when `complete`d). + /// If `true`, the cursor cannot be moved. is_completed: bool, } -#[allow(dead_code)] +impl Iterator for Cursor { + type Item = Node; + + fn next(&mut self) -> Option { + if self.is_completed { + None + } else { + let cur = self.node(); + self.go_to_next(); + Some(cur) + } + } +} + impl Cursor { pub(crate) fn new(node: Node) -> Self { Self { path: vec![], - leaf: CursorLeaf { + current: PathNode { node, child_number: 0, text_offset: Default::default(), @@ -49,46 +99,49 @@ impl Cursor { } } + /// Resets the cursor to the root node. pub fn reset(&mut self) { self.complete(); self.is_completed = false; } + /// Completes the cursor, setting it to the root node. pub fn complete(&mut self) { - if let Some(path_element) = self.path.get(0) { - self.leaf.text_offset = path_element.text_offset; - self.leaf.child_number = path_element.child_number; - self.leaf.node = Node::Rule(path_element.rule_node.clone()); - self.path.clear(); + if let Some(root) = self.path.drain(..).next() { + self.current = root.into_path_node(); } self.is_completed = true; } - // Unlike clone, this re-roots at the current node. - // It does preserve the correct text offset however, - // even though the path is reset. + /// Unlike `clone`, this re-roots at the current node. + /// It does preserve the correct text offset however, + /// even though the path is reset. pub fn spawn(&self) -> Self { Self { path: vec![], - leaf: self.leaf.clone(), + current: self.current.clone(), is_completed: false, } } + /// Whether the cursor can be moved. pub fn is_completed(&self) -> bool { self.is_completed } + /// Returns the currently pointed to [`Node`]. pub fn node(&self) -> Node { - self.leaf.node.clone() + self.current.node.clone() } + /// Returns the text offset that corresponds to the beginning of the currently pointed to node. pub fn text_offset(&self) -> TextIndex { - self.leaf.text_offset + self.current.text_offset } + /// Returns the text range that corresponds to the currently pointed to node. pub fn text_range(&self) -> TextRange { - self.leaf.text_range() + self.current.text_range() } pub fn path_rule_nodes(&self) -> Vec> { @@ -98,6 +151,9 @@ impl Cursor { .collect() } + /// Attempts to go to current node's next one, according to the DFS pre-order traversal. + /// + /// Returns `false` if the cursor is finished and at the root. pub fn go_to_next(&mut self) -> bool { if self.is_completed { return false; @@ -106,9 +162,13 @@ impl Cursor { if !self.go_to_first_child() { return self.go_to_next_non_descendent(); } - return true; + + true } + /// Attempts to go to current node's next non-descendent. + /// + /// Returns `false` if the cursor is finished and at the root. pub fn go_to_next_non_descendent(&mut self) -> bool { if self.is_completed { return false; @@ -119,9 +179,13 @@ impl Cursor { return false; } } - return true; + + true } + /// Attempts to go to current node's previous one, according to the DFS pre-order traversal. + /// + /// Returns `false` if the cursor is finished and at the root. pub fn go_to_previous(&mut self) -> bool { if self.is_completed { return false; @@ -133,152 +197,163 @@ impl Cursor { } } while self.go_to_last_child() {} - return true; + + true } + /// Attempts to go to current node's parent. + /// + /// Returns `false` if the cursor is finished and at the root. pub fn go_to_parent(&mut self) -> bool { - if self.path.is_empty() { - self.is_completed = true; - return false; - } + match self.path.pop() { + Some(parent) => { + self.current = parent.into_path_node(); - let path_element = self.path.pop().unwrap(); - self.leaf.text_offset = path_element.text_offset; - self.leaf.child_number = path_element.child_number; - self.leaf.node = Node::Rule(path_element.rule_node); - return true; + true + } + None => { + self.is_completed = true; + + false + } + } } + /// Attempts to go to current node's first child. + /// + /// Returns `false` if the cursor is finished or there's no child to go to. pub fn go_to_first_child(&mut self) -> bool { if self.is_completed { return false; } - // Check that the leaf is a rule node, and destructure if so - if let CursorLeaf { - node: Node::Rule(parent_rule_node), - text_offset: parent_text_offset, - child_number: parent_child_number, - } = &self.leaf - { - let child_number = 0; - if let Some(child_node) = parent_rule_node.children.get(child_number).cloned() { - self.path.push(CursorPathElement { - rule_node: parent_rule_node.clone(), - child_number: *parent_child_number, - text_offset: *parent_text_offset, - }); - self.leaf.text_offset = *parent_text_offset; - self.leaf.child_number = child_number; - self.leaf.node = child_node; + // If the current cursor is a node and it has children, go to first children + if let Some(parent) = self.current.to_path_rule_node() { + if let Some(child_node) = parent.rule_node.children.first().cloned() { + self.current = PathNode { + node: child_node, + text_offset: parent.text_offset, + child_number: 0, + }; + + self.path.push(parent); + return true; } } - return false; + false } + /// Attempts to go to current node's last child. + + /// Returns `false` if the cursor is finished or there's no child to go to. pub fn go_to_last_child(&mut self) -> bool { if self.is_completed { return false; } - // Check that the leaf is a rule node, and destructure if so - if let CursorLeaf { - node: Node::Rule(parent_rule_node), - text_offset: parent_text_offset, - child_number: parent_child_number, - } = &self.leaf - { - let child_number = parent_rule_node.children.len() - 1; - if let Some(child_node) = parent_rule_node.children.get(child_number).cloned() { + if let Some(parent) = self.current.to_path_rule_node() { + let child_number = parent.rule_node.children.len() - 1; + if let Some(child_node) = parent.rule_node.children.get(child_number).cloned() { // This is cheaper than summing up the length of the children let text_offset = - *parent_text_offset + parent_rule_node.text_len - child_node.text_len(); - self.path.push(CursorPathElement { - rule_node: parent_rule_node.clone(), - child_number: *parent_child_number, - text_offset: *parent_text_offset, - }); - self.leaf.text_offset = text_offset; - self.leaf.child_number = child_number; - self.leaf.node = child_node; + parent.text_offset + parent.rule_node.text_len - child_node.text_len(); + + self.path.push(parent); + + self.current = PathNode { + node: child_node, + text_offset, + child_number, + }; + return true; } } - return false; + false } + /// Attempts to go to current node's nth child. + /// + /// Returns `false` if the cursor is finished or there's no child to go to. pub fn go_to_nth_child(&mut self, child_number: usize) -> bool { if self.is_completed { return false; } - // Check that the leaf is a rule node, and destructure if so - if let CursorLeaf { - node: Node::Rule(parent_rule_node), - text_offset: parent_text_offset, - child_number: parent_child_number, - } = &self.leaf - { - if let Some(child_node) = parent_rule_node.children.get(child_number).cloned() { + if let Some(parent) = self.current.to_path_rule_node() { + if let Some(child_node) = parent.rule_node.children.get(child_number).cloned() { // Sum up the length of the children before this child // TODO: it might sometimes be quicker to start from the end (like `go_to_last_child`) - let mut text_offset = *parent_text_offset; - for child in &parent_rule_node.children[..child_number] { - text_offset += child.text_len(); - } - self.path.push(CursorPathElement { - rule_node: parent_rule_node.clone(), - child_number: *parent_child_number, - text_offset: *parent_text_offset, - }); - self.leaf.text_offset = text_offset; - self.leaf.child_number = child_number; - self.leaf.node = child_node; + let text_offset = parent.text_offset + + parent.rule_node.children[..child_number] + .iter() + .map(|child| child.text_len()) + .sum(); + + self.path.push(parent); + self.current = PathNode { + node: child_node, + text_offset, + child_number, + }; + return true; } } - return false; + false } + /// Attempts to go to current node's next sibling. + /// + /// Returns `false` if the cursor is finished or there's no sibling to go to. pub fn go_to_next_sibling(&mut self) -> bool { if self.is_completed { return false; } if let Some(parent_path_element) = self.path.last() { - let new_child_number = self.leaf.child_number + 1; + let new_child_number = self.current.child_number + 1; if let Some(new_child) = parent_path_element.rule_node.children.get(new_child_number) { - self.leaf.text_offset += self.leaf.node.text_len(); - self.leaf.child_number = new_child_number; - self.leaf.node = new_child.clone(); + self.current = PathNode { + node: new_child.clone(), + text_offset: self.current.text_offset + self.current.node.text_len(), + child_number: new_child_number, + }; + return true; } } - return false; + false } + /// Attempts to go to current node's previous sibling. + /// + /// Returns `false` if the cursor is finished or there's no sibling to go to. pub fn go_to_previous_sibling(&mut self) -> bool { if self.is_completed { return false; } - if self.leaf.child_number > 0 { + if self.current.child_number > 0 { if let Some(parent_path_element) = self.path.last() { - let new_child_number = self.leaf.child_number + 1; + let new_child_number = self.current.child_number + 1; let new_child = parent_path_element.rule_node.children[new_child_number].clone(); - self.leaf.text_offset -= self.leaf.node.text_len(); - self.leaf.child_number = new_child_number; - self.leaf.node = new_child; + + self.current = PathNode { + node: new_child, + text_offset: self.current.text_offset - self.current.node.text_len(), + child_number: new_child_number, + }; return true; } } - return false; + false } pub fn find_matching Option>(&mut self, filter_map: F) -> Option { @@ -288,52 +363,55 @@ impl Cursor { } self.go_to_next(); } - return None; + + None } - pub fn find_token_with_kind(&mut self, kinds: &[TokenKind]) -> Option> { + /// In contract to `Iterator::find_*`, this does not consume the first item when found. + fn find_noconsume Option, R>(&mut self, predicate: F) -> Option { while !self.is_completed { - if let Some(token_node) = self.leaf.node.as_token_with_kind(kinds).cloned() { - return Some(token_node); + match predicate(&self.current.node) { + Some(result) => return Some(result), + _ => { + self.go_to_next(); + } } - self.go_to_next(); } - return None; + + None } + /// Finds the first token with either of the given kinds. + /// + /// Does not consume the iterator if the first item matches. + pub fn find_token_with_kind(&mut self, kinds: &[TokenKind]) -> Option> { + self.find_noconsume(|node| node.as_token_with_kind(kinds).cloned()) + } + + /// Finds the first token node matching the given predicate. + /// + /// Does not consume the iterator if the first item matches. pub fn find_token_matching) -> bool>( &mut self, predicate: F, ) -> Option> { - while !self.is_completed { - if let Some(token_node) = self.leaf.node.as_token_matching(&predicate) { - return Some(token_node.clone()); - } - self.go_to_next(); - } - return None; + self.find_noconsume(|node| node.as_token_matching(&predicate).cloned()) } + /// Finds the first rule node with either of the given kinds. + /// + /// Does not consume the iterator if the first item matches. pub fn find_rule_with_kind(&mut self, kinds: &[RuleKind]) -> Option> { - while !self.is_completed { - if let Some(rule_node) = self.leaf.node.as_rule_with_kind(kinds) { - return Some(rule_node.clone()); - } - self.go_to_next(); - } - return None; + self.find_noconsume(|node| node.as_rule_with_kind(kinds).cloned()) } + /// Finds the first rule node matching the given predicate. + /// + /// Does not consume the iterator if the first item matches. pub fn find_rule_matching) -> bool>( &mut self, predicate: F, ) -> Option> { - while !self.is_completed { - if let Some(rule_node) = self.leaf.node.as_rule_matching(&predicate) { - return Some(rule_node.clone()); - } - self.go_to_next(); - } - return None; + self.find_noconsume(|node| node.as_rule_matching(&predicate).cloned()) } } diff --git a/crates/codegen/parser/runtime/src/support/choice_helper.rs b/crates/codegen/parser/runtime/src/support/choice_helper.rs index 27d6b6713a..8c6cd9ad5e 100644 --- a/crates/codegen/parser/runtime/src/support/choice_helper.rs +++ b/crates/codegen/parser/runtime/src/support/choice_helper.rs @@ -3,7 +3,7 @@ use std::ops::ControlFlow; use crate::{cst, kinds::TokenKind, parse_error::ParseError, text_index::TextIndex}; -use super::{context::Marker, parser_result::DescendentsIter, ParserContext, ParserResult}; +use super::{context::Marker, ParserContext, ParserResult}; /// Starting from a given position in the input, this helper will try to pick (and remember) a best match. Settles on /// a first full match if possible, otherwise on the best incomplete match. @@ -145,9 +145,11 @@ pub fn total_not_skipped_span(result: &ParserResult) -> usize { }; nodes - .descendents() - .filter_map(cst::Node::as_token) - .filter(|tok| tok.kind != TokenKind::SKIPPED) - .map(|tok| tok.text.len()) + .iter() + .flat_map(cst::Node::cursor) + .filter_map(|node| match node { + cst::Node::Token(token) if token.kind != TokenKind::SKIPPED => Some(token.text.len()), + _ => None, + }) .sum() } diff --git a/crates/codegen/parser/runtime/src/support/parser_function.rs b/crates/codegen/parser/runtime/src/support/parser_function.rs index a9ca991104..22355d9378 100644 --- a/crates/codegen/parser/runtime/src/support/parser_function.rs +++ b/crates/codegen/parser/runtime/src/support/parser_function.rs @@ -90,19 +90,16 @@ where errors, } } else { + let parse_tree = cst::Node::Rule(topmost_rule); // Sanity check: Make sure that succesful parse is equivalent to not having any SKIPPED nodes debug_assert_eq!( errors.len() > 0, - topmost_rule - .children - .descendents() + parse_tree + .cursor() .any(|x| x.as_token_with_kind(&[TokenKind::SKIPPED]).is_some()) ); - ParseOutput { - parse_tree: cst::Node::Rule(topmost_rule), - errors, - } + ParseOutput { parse_tree, errors } } } } diff --git a/crates/codegen/parser/runtime/src/support/parser_result.rs b/crates/codegen/parser/runtime/src/support/parser_result.rs index e14889f620..aea9ebdc51 100644 --- a/crates/codegen/parser/runtime/src/support/parser_result.rs +++ b/crates/codegen/parser/runtime/src/support/parser_result.rs @@ -75,40 +75,6 @@ impl ParserResult { } } -// DFS iterator over the descendents of a node. -pub(crate) struct DescendentsUnordered<'a> { - stack: Vec<&'a cst::Node>, -} - -impl<'a> Iterator for DescendentsUnordered<'a> { - type Item = &'a cst::Node; - - fn next(&mut self) -> Option<&'a cst::Node> { - self.stack.pop().map(|node| { - if let Some(node) = node.as_rule() { - self.stack.extend(node.children.iter()); - } - - node - }) - } -} - -pub(crate) trait DescendentsIter<'a> { - fn descendents(self) -> DescendentsUnordered<'a>; -} - -impl<'a, T> DescendentsIter<'a> for T -where - T: IntoIterator + 'a, -{ - fn descendents(self) -> DescendentsUnordered<'a> { - DescendentsUnordered { - stack: self.into_iter().collect(), - } - } -} - #[derive(PartialEq, Eq, Clone, Debug)] pub struct Match { pub nodes: Vec, @@ -126,7 +92,8 @@ impl Match { pub fn is_full_recursive(&self) -> bool { self.nodes - .descendents() + .iter() + .flat_map(cst::Node::cursor) .all(|node| node.as_token_with_kind(&[TokenKind::SKIPPED]).is_none()) } } diff --git a/crates/codegen/parser/runtime/src/visitor.rs b/crates/codegen/parser/runtime/src/visitor.rs index 96d08233eb..1562bc0e59 100644 --- a/crates/codegen/parser/runtime/src/visitor.rs +++ b/crates/codegen/parser/runtime/src/visitor.rs @@ -1,83 +1,76 @@ +use std::ops::ControlFlow; use std::rc::Rc; use super::{cst::*, cursor::Cursor}; -#[allow(unused_variables)] +/// A Visitor pattern for traversing the CST. +/// +/// The trait supports fallible iteration, i.e. the visitor can early return an error from the visit. pub trait Visitor { + /// Called when the [`Visitor`] enters a [`RuleNode`]. fn rule_enter( &mut self, - node: &Rc, - cursor: &Cursor, - ) -> Result { - Ok(VisitorEntryResponse::StepIn) + _node: &Rc, + _cursor: &Cursor, + ) -> Result, E> { + Ok(ControlFlow::Continue(Step::In)) } - fn rule_exit( - &mut self, - node: &Rc, - cursor: &Cursor, - ) -> Result { - Ok(VisitorExitResponse::Continue) + /// Called when the [`Visitor`] exits a [`RuleNode`]. + fn rule_exit(&mut self, _node: &Rc, _cursor: &Cursor) -> Result, E> { + Ok(ControlFlow::Continue(())) } - fn token(&mut self, node: &Rc, cursor: &Cursor) -> Result { - Ok(VisitorExitResponse::Continue) + /// Called when the [`Visitor`] enters a [`TokenNode`]. + fn token(&mut self, _node: &Rc, _cursor: &Cursor) -> Result, E> { + Ok(ControlFlow::Continue(())) } } -#[allow(dead_code)] -#[derive(Copy, Clone, Debug, PartialEq, Eq)] -pub enum VisitorEntryResponse { - Quit, - StepIn, - StepOver, -} - -#[allow(dead_code)] -#[derive(Copy, Clone, Debug, PartialEq, Eq)] -pub enum VisitorExitResponse { - Quit, - Continue, +/// Whether the [`Visitor`] should should enter the children of a [`RuleNode`] or not. +pub enum Step { + In, + Over, } -#[allow(dead_code)] impl Cursor { pub fn drive_visitor>( &mut self, visitor: &mut V, - ) -> Result { + ) -> Result, E> { if self.is_completed() { - return Ok(VisitorExitResponse::Continue); + return Ok(ControlFlow::Continue(())); } + loop { // Node clone is cheap because it's just an enum around an Rc match self.node() { Node::Rule(rule_node) => { match visitor.rule_enter(&rule_node, self)? { - VisitorEntryResponse::Quit => return Ok(VisitorExitResponse::Quit), - VisitorEntryResponse::StepIn => { + ControlFlow::Break(()) => return Ok(ControlFlow::Break(())), + ControlFlow::Continue(Step::In) => { if self.go_to_first_child() { self.drive_visitor(visitor)?; self.go_to_parent(); } } - VisitorEntryResponse::StepOver => {} + ControlFlow::Continue(Step::Over) => {} } - if visitor.rule_exit(&rule_node, self)? == VisitorExitResponse::Quit { - return Ok(VisitorExitResponse::Quit); + if visitor.rule_exit(&rule_node, self)? == ControlFlow::Break(()) { + return Ok(ControlFlow::Break(())); } } Node::Token(token_node) => { - if visitor.token(&token_node, self)? == VisitorExitResponse::Quit { - return Ok(VisitorExitResponse::Quit); + if visitor.token(&token_node, self)? == ControlFlow::Break(()) { + return Ok(ControlFlow::Break(())); } } } + if !self.go_to_next_sibling() { - break; + return Ok(ControlFlow::Continue(())); } } - Ok(VisitorExitResponse::Continue) } } diff --git a/crates/solidity/outputs/cargo/crate/src/generated/cursor.rs b/crates/solidity/outputs/cargo/crate/src/generated/cursor.rs index 2567f28e28..59e7a60d6a 100644 --- a/crates/solidity/outputs/cargo/crate/src/generated/cursor.rs +++ b/crates/solidity/outputs/cargo/crate/src/generated/cursor.rs @@ -1,5 +1,7 @@ // This file is generated automatically by infrastructure scripts. Please don't edit by hand. +//! A cursor that can traverse a CST in a DFS pre-order fashion. + use std::rc::Rc; use super::{ @@ -8,41 +10,89 @@ use super::{ text_index::{TextIndex, TextRange}, }; +/// A [`PathNode`] that points to a [`RuleNode`]. #[derive(Clone, Debug, PartialEq, Eq)] -struct CursorPathElement { +struct PathRuleNode { rule_node: Rc, child_number: usize, text_offset: TextIndex, } +impl PathRuleNode { + fn into_path_node(self) -> PathNode { + PathNode { + node: Node::Rule(self.rule_node), + child_number: self.child_number, + text_offset: self.text_offset, + } + } +} + +/// A pointer to a [`Node`] in a CST, used by the [`Cursor`] to implement the traversal. #[derive(Clone, Debug, PartialEq, Eq)] -struct CursorLeaf { +struct PathNode { + /// The node the cursor is currently pointing to. node: Node, + /// The index of the current child node in the parent's children. + // Required to go to the next/previous sibling. child_number: usize, + /// Text offset that corresponds to the beginning of the currently pointed to node. text_offset: TextIndex, } -impl CursorLeaf { - pub fn text_range(&self) -> TextRange { +impl PathNode { + fn text_range(&self) -> TextRange { let start = self.text_offset; let end = start + self.node.text_len(); start..end } + + fn to_path_rule_node(&self) -> Option { + if let Node::Rule(rule_node) = &self.node { + Some(PathRuleNode { + rule_node: rule_node.clone(), + child_number: self.child_number, + text_offset: self.text_offset, + }) + } else { + None + } + } } +/// A cursor that can traverse a CST. +/// +/// Nodes are visited in a DFS pre-order traversal. #[derive(Clone, Debug, PartialEq, Eq)] pub struct Cursor { - path: Vec, - leaf: CursorLeaf, + /// The list of ancestor rule nodes that the `current` node is a part of. + path: Vec, + /// The node the cursor is currently pointing to. + current: PathNode, + /// Whether the cursor is completed, i.e. at the root node as a result of traversal (or when `complete`d). + /// If `true`, the cursor cannot be moved. is_completed: bool, } -#[allow(dead_code)] +impl Iterator for Cursor { + type Item = Node; + + fn next(&mut self) -> Option { + if self.is_completed { + None + } else { + let cur = self.node(); + self.go_to_next(); + Some(cur) + } + } +} + impl Cursor { pub(crate) fn new(node: Node) -> Self { Self { path: vec![], - leaf: CursorLeaf { + current: PathNode { node, child_number: 0, text_offset: Default::default(), @@ -51,46 +101,49 @@ impl Cursor { } } + /// Resets the cursor to the root node. pub fn reset(&mut self) { self.complete(); self.is_completed = false; } + /// Completes the cursor, setting it to the root node. pub fn complete(&mut self) { - if let Some(path_element) = self.path.get(0) { - self.leaf.text_offset = path_element.text_offset; - self.leaf.child_number = path_element.child_number; - self.leaf.node = Node::Rule(path_element.rule_node.clone()); - self.path.clear(); + if let Some(root) = self.path.drain(..).next() { + self.current = root.into_path_node(); } self.is_completed = true; } - // Unlike clone, this re-roots at the current node. - // It does preserve the correct text offset however, - // even though the path is reset. + /// Unlike `clone`, this re-roots at the current node. + /// It does preserve the correct text offset however, + /// even though the path is reset. pub fn spawn(&self) -> Self { Self { path: vec![], - leaf: self.leaf.clone(), + current: self.current.clone(), is_completed: false, } } + /// Whether the cursor can be moved. pub fn is_completed(&self) -> bool { self.is_completed } + /// Returns the currently pointed to [`Node`]. pub fn node(&self) -> Node { - self.leaf.node.clone() + self.current.node.clone() } + /// Returns the text offset that corresponds to the beginning of the currently pointed to node. pub fn text_offset(&self) -> TextIndex { - self.leaf.text_offset + self.current.text_offset } + /// Returns the text range that corresponds to the currently pointed to node. pub fn text_range(&self) -> TextRange { - self.leaf.text_range() + self.current.text_range() } pub fn path_rule_nodes(&self) -> Vec> { @@ -100,6 +153,9 @@ impl Cursor { .collect() } + /// Attempts to go to current node's next one, according to the DFS pre-order traversal. + /// + /// Returns `false` if the cursor is finished and at the root. pub fn go_to_next(&mut self) -> bool { if self.is_completed { return false; @@ -108,9 +164,13 @@ impl Cursor { if !self.go_to_first_child() { return self.go_to_next_non_descendent(); } - return true; + + true } + /// Attempts to go to current node's next non-descendent. + /// + /// Returns `false` if the cursor is finished and at the root. pub fn go_to_next_non_descendent(&mut self) -> bool { if self.is_completed { return false; @@ -121,9 +181,13 @@ impl Cursor { return false; } } - return true; + + true } + /// Attempts to go to current node's previous one, according to the DFS pre-order traversal. + /// + /// Returns `false` if the cursor is finished and at the root. pub fn go_to_previous(&mut self) -> bool { if self.is_completed { return false; @@ -135,152 +199,163 @@ impl Cursor { } } while self.go_to_last_child() {} - return true; + + true } + /// Attempts to go to current node's parent. + /// + /// Returns `false` if the cursor is finished and at the root. pub fn go_to_parent(&mut self) -> bool { - if self.path.is_empty() { - self.is_completed = true; - return false; - } + match self.path.pop() { + Some(parent) => { + self.current = parent.into_path_node(); - let path_element = self.path.pop().unwrap(); - self.leaf.text_offset = path_element.text_offset; - self.leaf.child_number = path_element.child_number; - self.leaf.node = Node::Rule(path_element.rule_node); - return true; + true + } + None => { + self.is_completed = true; + + false + } + } } + /// Attempts to go to current node's first child. + /// + /// Returns `false` if the cursor is finished or there's no child to go to. pub fn go_to_first_child(&mut self) -> bool { if self.is_completed { return false; } - // Check that the leaf is a rule node, and destructure if so - if let CursorLeaf { - node: Node::Rule(parent_rule_node), - text_offset: parent_text_offset, - child_number: parent_child_number, - } = &self.leaf - { - let child_number = 0; - if let Some(child_node) = parent_rule_node.children.get(child_number).cloned() { - self.path.push(CursorPathElement { - rule_node: parent_rule_node.clone(), - child_number: *parent_child_number, - text_offset: *parent_text_offset, - }); - self.leaf.text_offset = *parent_text_offset; - self.leaf.child_number = child_number; - self.leaf.node = child_node; + // If the current cursor is a node and it has children, go to first children + if let Some(parent) = self.current.to_path_rule_node() { + if let Some(child_node) = parent.rule_node.children.first().cloned() { + self.current = PathNode { + node: child_node, + text_offset: parent.text_offset, + child_number: 0, + }; + + self.path.push(parent); + return true; } } - return false; + false } + /// Attempts to go to current node's last child. + + /// Returns `false` if the cursor is finished or there's no child to go to. pub fn go_to_last_child(&mut self) -> bool { if self.is_completed { return false; } - // Check that the leaf is a rule node, and destructure if so - if let CursorLeaf { - node: Node::Rule(parent_rule_node), - text_offset: parent_text_offset, - child_number: parent_child_number, - } = &self.leaf - { - let child_number = parent_rule_node.children.len() - 1; - if let Some(child_node) = parent_rule_node.children.get(child_number).cloned() { + if let Some(parent) = self.current.to_path_rule_node() { + let child_number = parent.rule_node.children.len() - 1; + if let Some(child_node) = parent.rule_node.children.get(child_number).cloned() { // This is cheaper than summing up the length of the children let text_offset = - *parent_text_offset + parent_rule_node.text_len - child_node.text_len(); - self.path.push(CursorPathElement { - rule_node: parent_rule_node.clone(), - child_number: *parent_child_number, - text_offset: *parent_text_offset, - }); - self.leaf.text_offset = text_offset; - self.leaf.child_number = child_number; - self.leaf.node = child_node; + parent.text_offset + parent.rule_node.text_len - child_node.text_len(); + + self.path.push(parent); + + self.current = PathNode { + node: child_node, + text_offset, + child_number, + }; + return true; } } - return false; + false } + /// Attempts to go to current node's nth child. + /// + /// Returns `false` if the cursor is finished or there's no child to go to. pub fn go_to_nth_child(&mut self, child_number: usize) -> bool { if self.is_completed { return false; } - // Check that the leaf is a rule node, and destructure if so - if let CursorLeaf { - node: Node::Rule(parent_rule_node), - text_offset: parent_text_offset, - child_number: parent_child_number, - } = &self.leaf - { - if let Some(child_node) = parent_rule_node.children.get(child_number).cloned() { + if let Some(parent) = self.current.to_path_rule_node() { + if let Some(child_node) = parent.rule_node.children.get(child_number).cloned() { // Sum up the length of the children before this child // TODO: it might sometimes be quicker to start from the end (like `go_to_last_child`) - let mut text_offset = *parent_text_offset; - for child in &parent_rule_node.children[..child_number] { - text_offset += child.text_len(); - } - self.path.push(CursorPathElement { - rule_node: parent_rule_node.clone(), - child_number: *parent_child_number, - text_offset: *parent_text_offset, - }); - self.leaf.text_offset = text_offset; - self.leaf.child_number = child_number; - self.leaf.node = child_node; + let text_offset = parent.text_offset + + parent.rule_node.children[..child_number] + .iter() + .map(|child| child.text_len()) + .sum(); + + self.path.push(parent); + self.current = PathNode { + node: child_node, + text_offset, + child_number, + }; + return true; } } - return false; + false } + /// Attempts to go to current node's next sibling. + /// + /// Returns `false` if the cursor is finished or there's no sibling to go to. pub fn go_to_next_sibling(&mut self) -> bool { if self.is_completed { return false; } if let Some(parent_path_element) = self.path.last() { - let new_child_number = self.leaf.child_number + 1; + let new_child_number = self.current.child_number + 1; if let Some(new_child) = parent_path_element.rule_node.children.get(new_child_number) { - self.leaf.text_offset += self.leaf.node.text_len(); - self.leaf.child_number = new_child_number; - self.leaf.node = new_child.clone(); + self.current = PathNode { + node: new_child.clone(), + text_offset: self.current.text_offset + self.current.node.text_len(), + child_number: new_child_number, + }; + return true; } } - return false; + false } + /// Attempts to go to current node's previous sibling. + /// + /// Returns `false` if the cursor is finished or there's no sibling to go to. pub fn go_to_previous_sibling(&mut self) -> bool { if self.is_completed { return false; } - if self.leaf.child_number > 0 { + if self.current.child_number > 0 { if let Some(parent_path_element) = self.path.last() { - let new_child_number = self.leaf.child_number + 1; + let new_child_number = self.current.child_number + 1; let new_child = parent_path_element.rule_node.children[new_child_number].clone(); - self.leaf.text_offset -= self.leaf.node.text_len(); - self.leaf.child_number = new_child_number; - self.leaf.node = new_child; + + self.current = PathNode { + node: new_child, + text_offset: self.current.text_offset - self.current.node.text_len(), + child_number: new_child_number, + }; return true; } } - return false; + false } pub fn find_matching Option>(&mut self, filter_map: F) -> Option { @@ -290,52 +365,55 @@ impl Cursor { } self.go_to_next(); } - return None; + + None } - pub fn find_token_with_kind(&mut self, kinds: &[TokenKind]) -> Option> { + /// In contract to `Iterator::find_*`, this does not consume the first item when found. + fn find_noconsume Option, R>(&mut self, predicate: F) -> Option { while !self.is_completed { - if let Some(token_node) = self.leaf.node.as_token_with_kind(kinds).cloned() { - return Some(token_node); + match predicate(&self.current.node) { + Some(result) => return Some(result), + _ => { + self.go_to_next(); + } } - self.go_to_next(); } - return None; + + None } + /// Finds the first token with either of the given kinds. + /// + /// Does not consume the iterator if the first item matches. + pub fn find_token_with_kind(&mut self, kinds: &[TokenKind]) -> Option> { + self.find_noconsume(|node| node.as_token_with_kind(kinds).cloned()) + } + + /// Finds the first token node matching the given predicate. + /// + /// Does not consume the iterator if the first item matches. pub fn find_token_matching) -> bool>( &mut self, predicate: F, ) -> Option> { - while !self.is_completed { - if let Some(token_node) = self.leaf.node.as_token_matching(&predicate) { - return Some(token_node.clone()); - } - self.go_to_next(); - } - return None; + self.find_noconsume(|node| node.as_token_matching(&predicate).cloned()) } + /// Finds the first rule node with either of the given kinds. + /// + /// Does not consume the iterator if the first item matches. pub fn find_rule_with_kind(&mut self, kinds: &[RuleKind]) -> Option> { - while !self.is_completed { - if let Some(rule_node) = self.leaf.node.as_rule_with_kind(kinds) { - return Some(rule_node.clone()); - } - self.go_to_next(); - } - return None; + self.find_noconsume(|node| node.as_rule_with_kind(kinds).cloned()) } + /// Finds the first rule node matching the given predicate. + /// + /// Does not consume the iterator if the first item matches. pub fn find_rule_matching) -> bool>( &mut self, predicate: F, ) -> Option> { - while !self.is_completed { - if let Some(rule_node) = self.leaf.node.as_rule_matching(&predicate) { - return Some(rule_node.clone()); - } - self.go_to_next(); - } - return None; + self.find_noconsume(|node| node.as_rule_matching(&predicate).cloned()) } } diff --git a/crates/solidity/outputs/cargo/crate/src/generated/support/choice_helper.rs b/crates/solidity/outputs/cargo/crate/src/generated/support/choice_helper.rs index 396e7d3f84..0b9ab410a2 100644 --- a/crates/solidity/outputs/cargo/crate/src/generated/support/choice_helper.rs +++ b/crates/solidity/outputs/cargo/crate/src/generated/support/choice_helper.rs @@ -5,7 +5,7 @@ use std::ops::ControlFlow; use crate::{cst, kinds::TokenKind, parse_error::ParseError, text_index::TextIndex}; -use super::{context::Marker, parser_result::DescendentsIter, ParserContext, ParserResult}; +use super::{context::Marker, ParserContext, ParserResult}; /// Starting from a given position in the input, this helper will try to pick (and remember) a best match. Settles on /// a first full match if possible, otherwise on the best incomplete match. @@ -147,9 +147,11 @@ pub fn total_not_skipped_span(result: &ParserResult) -> usize { }; nodes - .descendents() - .filter_map(cst::Node::as_token) - .filter(|tok| tok.kind != TokenKind::SKIPPED) - .map(|tok| tok.text.len()) + .iter() + .flat_map(cst::Node::cursor) + .filter_map(|node| match node { + cst::Node::Token(token) if token.kind != TokenKind::SKIPPED => Some(token.text.len()), + _ => None, + }) .sum() } diff --git a/crates/solidity/outputs/cargo/crate/src/generated/support/parser_function.rs b/crates/solidity/outputs/cargo/crate/src/generated/support/parser_function.rs index 51311d9b4e..da70a20109 100644 --- a/crates/solidity/outputs/cargo/crate/src/generated/support/parser_function.rs +++ b/crates/solidity/outputs/cargo/crate/src/generated/support/parser_function.rs @@ -92,19 +92,16 @@ where errors, } } else { + let parse_tree = cst::Node::Rule(topmost_rule); // Sanity check: Make sure that succesful parse is equivalent to not having any SKIPPED nodes debug_assert_eq!( errors.len() > 0, - topmost_rule - .children - .descendents() + parse_tree + .cursor() .any(|x| x.as_token_with_kind(&[TokenKind::SKIPPED]).is_some()) ); - ParseOutput { - parse_tree: cst::Node::Rule(topmost_rule), - errors, - } + ParseOutput { parse_tree, errors } } } } diff --git a/crates/solidity/outputs/cargo/crate/src/generated/support/parser_result.rs b/crates/solidity/outputs/cargo/crate/src/generated/support/parser_result.rs index 59d59d85cf..c177478291 100644 --- a/crates/solidity/outputs/cargo/crate/src/generated/support/parser_result.rs +++ b/crates/solidity/outputs/cargo/crate/src/generated/support/parser_result.rs @@ -77,40 +77,6 @@ impl ParserResult { } } -// DFS iterator over the descendents of a node. -pub(crate) struct DescendentsUnordered<'a> { - stack: Vec<&'a cst::Node>, -} - -impl<'a> Iterator for DescendentsUnordered<'a> { - type Item = &'a cst::Node; - - fn next(&mut self) -> Option<&'a cst::Node> { - self.stack.pop().map(|node| { - if let Some(node) = node.as_rule() { - self.stack.extend(node.children.iter()); - } - - node - }) - } -} - -pub(crate) trait DescendentsIter<'a> { - fn descendents(self) -> DescendentsUnordered<'a>; -} - -impl<'a, T> DescendentsIter<'a> for T -where - T: IntoIterator + 'a, -{ - fn descendents(self) -> DescendentsUnordered<'a> { - DescendentsUnordered { - stack: self.into_iter().collect(), - } - } -} - #[derive(PartialEq, Eq, Clone, Debug)] pub struct Match { pub nodes: Vec, @@ -128,7 +94,8 @@ impl Match { pub fn is_full_recursive(&self) -> bool { self.nodes - .descendents() + .iter() + .flat_map(cst::Node::cursor) .all(|node| node.as_token_with_kind(&[TokenKind::SKIPPED]).is_none()) } } diff --git a/crates/solidity/outputs/cargo/crate/src/generated/visitor.rs b/crates/solidity/outputs/cargo/crate/src/generated/visitor.rs index 5aac41de28..2a8585df5d 100644 --- a/crates/solidity/outputs/cargo/crate/src/generated/visitor.rs +++ b/crates/solidity/outputs/cargo/crate/src/generated/visitor.rs @@ -1,85 +1,78 @@ // This file is generated automatically by infrastructure scripts. Please don't edit by hand. +use std::ops::ControlFlow; use std::rc::Rc; use super::{cst::*, cursor::Cursor}; -#[allow(unused_variables)] +/// A Visitor pattern for traversing the CST. +/// +/// The trait supports fallible iteration, i.e. the visitor can early return an error from the visit. pub trait Visitor { + /// Called when the [`Visitor`] enters a [`RuleNode`]. fn rule_enter( &mut self, - node: &Rc, - cursor: &Cursor, - ) -> Result { - Ok(VisitorEntryResponse::StepIn) + _node: &Rc, + _cursor: &Cursor, + ) -> Result, E> { + Ok(ControlFlow::Continue(Step::In)) } - fn rule_exit( - &mut self, - node: &Rc, - cursor: &Cursor, - ) -> Result { - Ok(VisitorExitResponse::Continue) + /// Called when the [`Visitor`] exits a [`RuleNode`]. + fn rule_exit(&mut self, _node: &Rc, _cursor: &Cursor) -> Result, E> { + Ok(ControlFlow::Continue(())) } - fn token(&mut self, node: &Rc, cursor: &Cursor) -> Result { - Ok(VisitorExitResponse::Continue) + /// Called when the [`Visitor`] enters a [`TokenNode`]. + fn token(&mut self, _node: &Rc, _cursor: &Cursor) -> Result, E> { + Ok(ControlFlow::Continue(())) } } -#[allow(dead_code)] -#[derive(Copy, Clone, Debug, PartialEq, Eq)] -pub enum VisitorEntryResponse { - Quit, - StepIn, - StepOver, -} - -#[allow(dead_code)] -#[derive(Copy, Clone, Debug, PartialEq, Eq)] -pub enum VisitorExitResponse { - Quit, - Continue, +/// Whether the [`Visitor`] should should enter the children of a [`RuleNode`] or not. +pub enum Step { + In, + Over, } -#[allow(dead_code)] impl Cursor { pub fn drive_visitor>( &mut self, visitor: &mut V, - ) -> Result { + ) -> Result, E> { if self.is_completed() { - return Ok(VisitorExitResponse::Continue); + return Ok(ControlFlow::Continue(())); } + loop { // Node clone is cheap because it's just an enum around an Rc match self.node() { Node::Rule(rule_node) => { match visitor.rule_enter(&rule_node, self)? { - VisitorEntryResponse::Quit => return Ok(VisitorExitResponse::Quit), - VisitorEntryResponse::StepIn => { + ControlFlow::Break(()) => return Ok(ControlFlow::Break(())), + ControlFlow::Continue(Step::In) => { if self.go_to_first_child() { self.drive_visitor(visitor)?; self.go_to_parent(); } } - VisitorEntryResponse::StepOver => {} + ControlFlow::Continue(Step::Over) => {} } - if visitor.rule_exit(&rule_node, self)? == VisitorExitResponse::Quit { - return Ok(VisitorExitResponse::Quit); + if visitor.rule_exit(&rule_node, self)? == ControlFlow::Break(()) { + return Ok(ControlFlow::Break(())); } } Node::Token(token_node) => { - if visitor.token(&token_node, self)? == VisitorExitResponse::Quit { - return Ok(VisitorExitResponse::Quit); + if visitor.token(&token_node, self)? == ControlFlow::Break(()) { + return Ok(ControlFlow::Break(())); } } } + if !self.go_to_next_sibling() { - break; + return Ok(ControlFlow::Continue(())); } } - Ok(VisitorExitResponse::Continue) } } diff --git a/crates/solidity/outputs/cargo/tests/src/doc_examples/cursor_api.rs b/crates/solidity/outputs/cargo/tests/src/doc_examples/cursor_api.rs index 61954d5a25..a5b4effde8 100644 --- a/crates/solidity/outputs/cargo/tests/src/doc_examples/cursor_api.rs +++ b/crates/solidity/outputs/cargo/tests/src/doc_examples/cursor_api.rs @@ -2,6 +2,7 @@ use anyhow::Result; use semver::Version; use slang_solidity::{ + cst::Node, kinds::{ProductionKind, RuleKind, TokenKind}, language::Language, }; @@ -90,3 +91,36 @@ fn cursor_api_using_iter() -> Result<()> { return Ok(()); } + +#[test] +fn cursor_as_iter() -> Result<()> { + let language = Language::new(Version::parse("0.8.0")?)?; + let parse_output = language.parse(ProductionKind::ContractDefinition, "contract Foo {}"); + + let mut cursor = parse_output.parse_tree().cursor(); + assert_eq!( + cursor.node().as_rule().unwrap().kind, + RuleKind::ContractDefinition + ); + + macro_rules! assert_next_is { + ($pattern:pat $(if $guard:expr)? $(,)?) => { + assert!(matches!(cursor.next(), $pattern $(if $guard)?)); + }; + } + + assert_next_is!(Some(Node::Rule(rule)) if rule.kind == RuleKind::ContractDefinition); + { + assert_next_is!(Some(Node::Token(token)) if token.kind == TokenKind::ContractKeyword); + assert_next_is!(Some(Node::Rule(rule)) if rule.kind == RuleKind::LeadingTrivia); + assert_next_is!(Some(Node::Token(token)) if token.kind == TokenKind::Whitespace); + assert_next_is!(Some(Node::Token(token)) if token.kind == TokenKind::Identifier && token.text == "Foo"); + assert_next_is!(Some(Node::Rule(rule)) if rule.kind == RuleKind::LeadingTrivia); + assert_next_is!(Some(Node::Token(token)) if token.kind == TokenKind::Whitespace); + assert_next_is!(Some(Node::Token(token)) if token.kind == TokenKind::OpenBrace); + assert_next_is!(Some(Node::Token(token)) if token.kind == TokenKind::CloseBrace); + } + assert_next_is!(None); + + Ok(()) +} diff --git a/crates/solidity/outputs/cargo/tests/src/doc_examples/visitor_api.rs b/crates/solidity/outputs/cargo/tests/src/doc_examples/visitor_api.rs index eea12776f6..832a91e1c2 100644 --- a/crates/solidity/outputs/cargo/tests/src/doc_examples/visitor_api.rs +++ b/crates/solidity/outputs/cargo/tests/src/doc_examples/visitor_api.rs @@ -1,3 +1,4 @@ +use std::ops::ControlFlow; use std::rc::Rc; use anyhow::{bail, ensure, Error, Result}; @@ -8,7 +9,7 @@ use slang_solidity::{ cursor::Cursor, kinds::{ProductionKind, RuleKind, TokenKind}, language::Language, - visitor::{Visitor, VisitorEntryResponse}, + visitor::{Step, Visitor}, }; struct ContractCollector { @@ -20,7 +21,7 @@ impl Visitor for ContractCollector { &mut self, node: &Rc, _cursor: &Cursor, - ) -> Result { + ) -> Result> { if node.kind == RuleKind::ContractDefinition { if let Node::Token(token) = &node.children[2] { ensure!(token.kind == TokenKind::Identifier); @@ -28,10 +29,11 @@ impl Visitor for ContractCollector { } else { bail!("Expected contract identifier: {node:?}"); }; - return Ok(VisitorEntryResponse::StepOver); + + return Ok(ControlFlow::Continue(Step::Over)); } - return Ok(VisitorEntryResponse::StepIn); + Ok(ControlFlow::Continue(Step::In)) } } diff --git a/crates/solidity/outputs/npm/crate/src/generated/cursor.rs b/crates/solidity/outputs/npm/crate/src/generated/cursor.rs index 2567f28e28..59e7a60d6a 100644 --- a/crates/solidity/outputs/npm/crate/src/generated/cursor.rs +++ b/crates/solidity/outputs/npm/crate/src/generated/cursor.rs @@ -1,5 +1,7 @@ // This file is generated automatically by infrastructure scripts. Please don't edit by hand. +//! A cursor that can traverse a CST in a DFS pre-order fashion. + use std::rc::Rc; use super::{ @@ -8,41 +10,89 @@ use super::{ text_index::{TextIndex, TextRange}, }; +/// A [`PathNode`] that points to a [`RuleNode`]. #[derive(Clone, Debug, PartialEq, Eq)] -struct CursorPathElement { +struct PathRuleNode { rule_node: Rc, child_number: usize, text_offset: TextIndex, } +impl PathRuleNode { + fn into_path_node(self) -> PathNode { + PathNode { + node: Node::Rule(self.rule_node), + child_number: self.child_number, + text_offset: self.text_offset, + } + } +} + +/// A pointer to a [`Node`] in a CST, used by the [`Cursor`] to implement the traversal. #[derive(Clone, Debug, PartialEq, Eq)] -struct CursorLeaf { +struct PathNode { + /// The node the cursor is currently pointing to. node: Node, + /// The index of the current child node in the parent's children. + // Required to go to the next/previous sibling. child_number: usize, + /// Text offset that corresponds to the beginning of the currently pointed to node. text_offset: TextIndex, } -impl CursorLeaf { - pub fn text_range(&self) -> TextRange { +impl PathNode { + fn text_range(&self) -> TextRange { let start = self.text_offset; let end = start + self.node.text_len(); start..end } + + fn to_path_rule_node(&self) -> Option { + if let Node::Rule(rule_node) = &self.node { + Some(PathRuleNode { + rule_node: rule_node.clone(), + child_number: self.child_number, + text_offset: self.text_offset, + }) + } else { + None + } + } } +/// A cursor that can traverse a CST. +/// +/// Nodes are visited in a DFS pre-order traversal. #[derive(Clone, Debug, PartialEq, Eq)] pub struct Cursor { - path: Vec, - leaf: CursorLeaf, + /// The list of ancestor rule nodes that the `current` node is a part of. + path: Vec, + /// The node the cursor is currently pointing to. + current: PathNode, + /// Whether the cursor is completed, i.e. at the root node as a result of traversal (or when `complete`d). + /// If `true`, the cursor cannot be moved. is_completed: bool, } -#[allow(dead_code)] +impl Iterator for Cursor { + type Item = Node; + + fn next(&mut self) -> Option { + if self.is_completed { + None + } else { + let cur = self.node(); + self.go_to_next(); + Some(cur) + } + } +} + impl Cursor { pub(crate) fn new(node: Node) -> Self { Self { path: vec![], - leaf: CursorLeaf { + current: PathNode { node, child_number: 0, text_offset: Default::default(), @@ -51,46 +101,49 @@ impl Cursor { } } + /// Resets the cursor to the root node. pub fn reset(&mut self) { self.complete(); self.is_completed = false; } + /// Completes the cursor, setting it to the root node. pub fn complete(&mut self) { - if let Some(path_element) = self.path.get(0) { - self.leaf.text_offset = path_element.text_offset; - self.leaf.child_number = path_element.child_number; - self.leaf.node = Node::Rule(path_element.rule_node.clone()); - self.path.clear(); + if let Some(root) = self.path.drain(..).next() { + self.current = root.into_path_node(); } self.is_completed = true; } - // Unlike clone, this re-roots at the current node. - // It does preserve the correct text offset however, - // even though the path is reset. + /// Unlike `clone`, this re-roots at the current node. + /// It does preserve the correct text offset however, + /// even though the path is reset. pub fn spawn(&self) -> Self { Self { path: vec![], - leaf: self.leaf.clone(), + current: self.current.clone(), is_completed: false, } } + /// Whether the cursor can be moved. pub fn is_completed(&self) -> bool { self.is_completed } + /// Returns the currently pointed to [`Node`]. pub fn node(&self) -> Node { - self.leaf.node.clone() + self.current.node.clone() } + /// Returns the text offset that corresponds to the beginning of the currently pointed to node. pub fn text_offset(&self) -> TextIndex { - self.leaf.text_offset + self.current.text_offset } + /// Returns the text range that corresponds to the currently pointed to node. pub fn text_range(&self) -> TextRange { - self.leaf.text_range() + self.current.text_range() } pub fn path_rule_nodes(&self) -> Vec> { @@ -100,6 +153,9 @@ impl Cursor { .collect() } + /// Attempts to go to current node's next one, according to the DFS pre-order traversal. + /// + /// Returns `false` if the cursor is finished and at the root. pub fn go_to_next(&mut self) -> bool { if self.is_completed { return false; @@ -108,9 +164,13 @@ impl Cursor { if !self.go_to_first_child() { return self.go_to_next_non_descendent(); } - return true; + + true } + /// Attempts to go to current node's next non-descendent. + /// + /// Returns `false` if the cursor is finished and at the root. pub fn go_to_next_non_descendent(&mut self) -> bool { if self.is_completed { return false; @@ -121,9 +181,13 @@ impl Cursor { return false; } } - return true; + + true } + /// Attempts to go to current node's previous one, according to the DFS pre-order traversal. + /// + /// Returns `false` if the cursor is finished and at the root. pub fn go_to_previous(&mut self) -> bool { if self.is_completed { return false; @@ -135,152 +199,163 @@ impl Cursor { } } while self.go_to_last_child() {} - return true; + + true } + /// Attempts to go to current node's parent. + /// + /// Returns `false` if the cursor is finished and at the root. pub fn go_to_parent(&mut self) -> bool { - if self.path.is_empty() { - self.is_completed = true; - return false; - } + match self.path.pop() { + Some(parent) => { + self.current = parent.into_path_node(); - let path_element = self.path.pop().unwrap(); - self.leaf.text_offset = path_element.text_offset; - self.leaf.child_number = path_element.child_number; - self.leaf.node = Node::Rule(path_element.rule_node); - return true; + true + } + None => { + self.is_completed = true; + + false + } + } } + /// Attempts to go to current node's first child. + /// + /// Returns `false` if the cursor is finished or there's no child to go to. pub fn go_to_first_child(&mut self) -> bool { if self.is_completed { return false; } - // Check that the leaf is a rule node, and destructure if so - if let CursorLeaf { - node: Node::Rule(parent_rule_node), - text_offset: parent_text_offset, - child_number: parent_child_number, - } = &self.leaf - { - let child_number = 0; - if let Some(child_node) = parent_rule_node.children.get(child_number).cloned() { - self.path.push(CursorPathElement { - rule_node: parent_rule_node.clone(), - child_number: *parent_child_number, - text_offset: *parent_text_offset, - }); - self.leaf.text_offset = *parent_text_offset; - self.leaf.child_number = child_number; - self.leaf.node = child_node; + // If the current cursor is a node and it has children, go to first children + if let Some(parent) = self.current.to_path_rule_node() { + if let Some(child_node) = parent.rule_node.children.first().cloned() { + self.current = PathNode { + node: child_node, + text_offset: parent.text_offset, + child_number: 0, + }; + + self.path.push(parent); + return true; } } - return false; + false } + /// Attempts to go to current node's last child. + + /// Returns `false` if the cursor is finished or there's no child to go to. pub fn go_to_last_child(&mut self) -> bool { if self.is_completed { return false; } - // Check that the leaf is a rule node, and destructure if so - if let CursorLeaf { - node: Node::Rule(parent_rule_node), - text_offset: parent_text_offset, - child_number: parent_child_number, - } = &self.leaf - { - let child_number = parent_rule_node.children.len() - 1; - if let Some(child_node) = parent_rule_node.children.get(child_number).cloned() { + if let Some(parent) = self.current.to_path_rule_node() { + let child_number = parent.rule_node.children.len() - 1; + if let Some(child_node) = parent.rule_node.children.get(child_number).cloned() { // This is cheaper than summing up the length of the children let text_offset = - *parent_text_offset + parent_rule_node.text_len - child_node.text_len(); - self.path.push(CursorPathElement { - rule_node: parent_rule_node.clone(), - child_number: *parent_child_number, - text_offset: *parent_text_offset, - }); - self.leaf.text_offset = text_offset; - self.leaf.child_number = child_number; - self.leaf.node = child_node; + parent.text_offset + parent.rule_node.text_len - child_node.text_len(); + + self.path.push(parent); + + self.current = PathNode { + node: child_node, + text_offset, + child_number, + }; + return true; } } - return false; + false } + /// Attempts to go to current node's nth child. + /// + /// Returns `false` if the cursor is finished or there's no child to go to. pub fn go_to_nth_child(&mut self, child_number: usize) -> bool { if self.is_completed { return false; } - // Check that the leaf is a rule node, and destructure if so - if let CursorLeaf { - node: Node::Rule(parent_rule_node), - text_offset: parent_text_offset, - child_number: parent_child_number, - } = &self.leaf - { - if let Some(child_node) = parent_rule_node.children.get(child_number).cloned() { + if let Some(parent) = self.current.to_path_rule_node() { + if let Some(child_node) = parent.rule_node.children.get(child_number).cloned() { // Sum up the length of the children before this child // TODO: it might sometimes be quicker to start from the end (like `go_to_last_child`) - let mut text_offset = *parent_text_offset; - for child in &parent_rule_node.children[..child_number] { - text_offset += child.text_len(); - } - self.path.push(CursorPathElement { - rule_node: parent_rule_node.clone(), - child_number: *parent_child_number, - text_offset: *parent_text_offset, - }); - self.leaf.text_offset = text_offset; - self.leaf.child_number = child_number; - self.leaf.node = child_node; + let text_offset = parent.text_offset + + parent.rule_node.children[..child_number] + .iter() + .map(|child| child.text_len()) + .sum(); + + self.path.push(parent); + self.current = PathNode { + node: child_node, + text_offset, + child_number, + }; + return true; } } - return false; + false } + /// Attempts to go to current node's next sibling. + /// + /// Returns `false` if the cursor is finished or there's no sibling to go to. pub fn go_to_next_sibling(&mut self) -> bool { if self.is_completed { return false; } if let Some(parent_path_element) = self.path.last() { - let new_child_number = self.leaf.child_number + 1; + let new_child_number = self.current.child_number + 1; if let Some(new_child) = parent_path_element.rule_node.children.get(new_child_number) { - self.leaf.text_offset += self.leaf.node.text_len(); - self.leaf.child_number = new_child_number; - self.leaf.node = new_child.clone(); + self.current = PathNode { + node: new_child.clone(), + text_offset: self.current.text_offset + self.current.node.text_len(), + child_number: new_child_number, + }; + return true; } } - return false; + false } + /// Attempts to go to current node's previous sibling. + /// + /// Returns `false` if the cursor is finished or there's no sibling to go to. pub fn go_to_previous_sibling(&mut self) -> bool { if self.is_completed { return false; } - if self.leaf.child_number > 0 { + if self.current.child_number > 0 { if let Some(parent_path_element) = self.path.last() { - let new_child_number = self.leaf.child_number + 1; + let new_child_number = self.current.child_number + 1; let new_child = parent_path_element.rule_node.children[new_child_number].clone(); - self.leaf.text_offset -= self.leaf.node.text_len(); - self.leaf.child_number = new_child_number; - self.leaf.node = new_child; + + self.current = PathNode { + node: new_child, + text_offset: self.current.text_offset - self.current.node.text_len(), + child_number: new_child_number, + }; return true; } } - return false; + false } pub fn find_matching Option>(&mut self, filter_map: F) -> Option { @@ -290,52 +365,55 @@ impl Cursor { } self.go_to_next(); } - return None; + + None } - pub fn find_token_with_kind(&mut self, kinds: &[TokenKind]) -> Option> { + /// In contract to `Iterator::find_*`, this does not consume the first item when found. + fn find_noconsume Option, R>(&mut self, predicate: F) -> Option { while !self.is_completed { - if let Some(token_node) = self.leaf.node.as_token_with_kind(kinds).cloned() { - return Some(token_node); + match predicate(&self.current.node) { + Some(result) => return Some(result), + _ => { + self.go_to_next(); + } } - self.go_to_next(); } - return None; + + None } + /// Finds the first token with either of the given kinds. + /// + /// Does not consume the iterator if the first item matches. + pub fn find_token_with_kind(&mut self, kinds: &[TokenKind]) -> Option> { + self.find_noconsume(|node| node.as_token_with_kind(kinds).cloned()) + } + + /// Finds the first token node matching the given predicate. + /// + /// Does not consume the iterator if the first item matches. pub fn find_token_matching) -> bool>( &mut self, predicate: F, ) -> Option> { - while !self.is_completed { - if let Some(token_node) = self.leaf.node.as_token_matching(&predicate) { - return Some(token_node.clone()); - } - self.go_to_next(); - } - return None; + self.find_noconsume(|node| node.as_token_matching(&predicate).cloned()) } + /// Finds the first rule node with either of the given kinds. + /// + /// Does not consume the iterator if the first item matches. pub fn find_rule_with_kind(&mut self, kinds: &[RuleKind]) -> Option> { - while !self.is_completed { - if let Some(rule_node) = self.leaf.node.as_rule_with_kind(kinds) { - return Some(rule_node.clone()); - } - self.go_to_next(); - } - return None; + self.find_noconsume(|node| node.as_rule_with_kind(kinds).cloned()) } + /// Finds the first rule node matching the given predicate. + /// + /// Does not consume the iterator if the first item matches. pub fn find_rule_matching) -> bool>( &mut self, predicate: F, ) -> Option> { - while !self.is_completed { - if let Some(rule_node) = self.leaf.node.as_rule_matching(&predicate) { - return Some(rule_node.clone()); - } - self.go_to_next(); - } - return None; + self.find_noconsume(|node| node.as_rule_matching(&predicate).cloned()) } } diff --git a/crates/solidity/outputs/npm/crate/src/generated/support/choice_helper.rs b/crates/solidity/outputs/npm/crate/src/generated/support/choice_helper.rs index 396e7d3f84..0b9ab410a2 100644 --- a/crates/solidity/outputs/npm/crate/src/generated/support/choice_helper.rs +++ b/crates/solidity/outputs/npm/crate/src/generated/support/choice_helper.rs @@ -5,7 +5,7 @@ use std::ops::ControlFlow; use crate::{cst, kinds::TokenKind, parse_error::ParseError, text_index::TextIndex}; -use super::{context::Marker, parser_result::DescendentsIter, ParserContext, ParserResult}; +use super::{context::Marker, ParserContext, ParserResult}; /// Starting from a given position in the input, this helper will try to pick (and remember) a best match. Settles on /// a first full match if possible, otherwise on the best incomplete match. @@ -147,9 +147,11 @@ pub fn total_not_skipped_span(result: &ParserResult) -> usize { }; nodes - .descendents() - .filter_map(cst::Node::as_token) - .filter(|tok| tok.kind != TokenKind::SKIPPED) - .map(|tok| tok.text.len()) + .iter() + .flat_map(cst::Node::cursor) + .filter_map(|node| match node { + cst::Node::Token(token) if token.kind != TokenKind::SKIPPED => Some(token.text.len()), + _ => None, + }) .sum() } diff --git a/crates/solidity/outputs/npm/crate/src/generated/support/parser_function.rs b/crates/solidity/outputs/npm/crate/src/generated/support/parser_function.rs index 51311d9b4e..da70a20109 100644 --- a/crates/solidity/outputs/npm/crate/src/generated/support/parser_function.rs +++ b/crates/solidity/outputs/npm/crate/src/generated/support/parser_function.rs @@ -92,19 +92,16 @@ where errors, } } else { + let parse_tree = cst::Node::Rule(topmost_rule); // Sanity check: Make sure that succesful parse is equivalent to not having any SKIPPED nodes debug_assert_eq!( errors.len() > 0, - topmost_rule - .children - .descendents() + parse_tree + .cursor() .any(|x| x.as_token_with_kind(&[TokenKind::SKIPPED]).is_some()) ); - ParseOutput { - parse_tree: cst::Node::Rule(topmost_rule), - errors, - } + ParseOutput { parse_tree, errors } } } } diff --git a/crates/solidity/outputs/npm/crate/src/generated/support/parser_result.rs b/crates/solidity/outputs/npm/crate/src/generated/support/parser_result.rs index 59d59d85cf..c177478291 100644 --- a/crates/solidity/outputs/npm/crate/src/generated/support/parser_result.rs +++ b/crates/solidity/outputs/npm/crate/src/generated/support/parser_result.rs @@ -77,40 +77,6 @@ impl ParserResult { } } -// DFS iterator over the descendents of a node. -pub(crate) struct DescendentsUnordered<'a> { - stack: Vec<&'a cst::Node>, -} - -impl<'a> Iterator for DescendentsUnordered<'a> { - type Item = &'a cst::Node; - - fn next(&mut self) -> Option<&'a cst::Node> { - self.stack.pop().map(|node| { - if let Some(node) = node.as_rule() { - self.stack.extend(node.children.iter()); - } - - node - }) - } -} - -pub(crate) trait DescendentsIter<'a> { - fn descendents(self) -> DescendentsUnordered<'a>; -} - -impl<'a, T> DescendentsIter<'a> for T -where - T: IntoIterator + 'a, -{ - fn descendents(self) -> DescendentsUnordered<'a> { - DescendentsUnordered { - stack: self.into_iter().collect(), - } - } -} - #[derive(PartialEq, Eq, Clone, Debug)] pub struct Match { pub nodes: Vec, @@ -128,7 +94,8 @@ impl Match { pub fn is_full_recursive(&self) -> bool { self.nodes - .descendents() + .iter() + .flat_map(cst::Node::cursor) .all(|node| node.as_token_with_kind(&[TokenKind::SKIPPED]).is_none()) } } diff --git a/crates/solidity/outputs/npm/crate/src/generated/visitor.rs b/crates/solidity/outputs/npm/crate/src/generated/visitor.rs index 5aac41de28..2a8585df5d 100644 --- a/crates/solidity/outputs/npm/crate/src/generated/visitor.rs +++ b/crates/solidity/outputs/npm/crate/src/generated/visitor.rs @@ -1,85 +1,78 @@ // This file is generated automatically by infrastructure scripts. Please don't edit by hand. +use std::ops::ControlFlow; use std::rc::Rc; use super::{cst::*, cursor::Cursor}; -#[allow(unused_variables)] +/// A Visitor pattern for traversing the CST. +/// +/// The trait supports fallible iteration, i.e. the visitor can early return an error from the visit. pub trait Visitor { + /// Called when the [`Visitor`] enters a [`RuleNode`]. fn rule_enter( &mut self, - node: &Rc, - cursor: &Cursor, - ) -> Result { - Ok(VisitorEntryResponse::StepIn) + _node: &Rc, + _cursor: &Cursor, + ) -> Result, E> { + Ok(ControlFlow::Continue(Step::In)) } - fn rule_exit( - &mut self, - node: &Rc, - cursor: &Cursor, - ) -> Result { - Ok(VisitorExitResponse::Continue) + /// Called when the [`Visitor`] exits a [`RuleNode`]. + fn rule_exit(&mut self, _node: &Rc, _cursor: &Cursor) -> Result, E> { + Ok(ControlFlow::Continue(())) } - fn token(&mut self, node: &Rc, cursor: &Cursor) -> Result { - Ok(VisitorExitResponse::Continue) + /// Called when the [`Visitor`] enters a [`TokenNode`]. + fn token(&mut self, _node: &Rc, _cursor: &Cursor) -> Result, E> { + Ok(ControlFlow::Continue(())) } } -#[allow(dead_code)] -#[derive(Copy, Clone, Debug, PartialEq, Eq)] -pub enum VisitorEntryResponse { - Quit, - StepIn, - StepOver, -} - -#[allow(dead_code)] -#[derive(Copy, Clone, Debug, PartialEq, Eq)] -pub enum VisitorExitResponse { - Quit, - Continue, +/// Whether the [`Visitor`] should should enter the children of a [`RuleNode`] or not. +pub enum Step { + In, + Over, } -#[allow(dead_code)] impl Cursor { pub fn drive_visitor>( &mut self, visitor: &mut V, - ) -> Result { + ) -> Result, E> { if self.is_completed() { - return Ok(VisitorExitResponse::Continue); + return Ok(ControlFlow::Continue(())); } + loop { // Node clone is cheap because it's just an enum around an Rc match self.node() { Node::Rule(rule_node) => { match visitor.rule_enter(&rule_node, self)? { - VisitorEntryResponse::Quit => return Ok(VisitorExitResponse::Quit), - VisitorEntryResponse::StepIn => { + ControlFlow::Break(()) => return Ok(ControlFlow::Break(())), + ControlFlow::Continue(Step::In) => { if self.go_to_first_child() { self.drive_visitor(visitor)?; self.go_to_parent(); } } - VisitorEntryResponse::StepOver => {} + ControlFlow::Continue(Step::Over) => {} } - if visitor.rule_exit(&rule_node, self)? == VisitorExitResponse::Quit { - return Ok(VisitorExitResponse::Quit); + if visitor.rule_exit(&rule_node, self)? == ControlFlow::Break(()) { + return Ok(ControlFlow::Break(())); } } Node::Token(token_node) => { - if visitor.token(&token_node, self)? == VisitorExitResponse::Quit { - return Ok(VisitorExitResponse::Quit); + if visitor.token(&token_node, self)? == ControlFlow::Break(()) { + return Ok(ControlFlow::Break(())); } } } + if !self.go_to_next_sibling() { - break; + return Ok(ControlFlow::Continue(())); } } - Ok(VisitorExitResponse::Continue) } } diff --git a/crates/solidity/testing/utils/src/cst_snapshots/test_nodes.rs b/crates/solidity/testing/utils/src/cst_snapshots/test_nodes.rs index 235199a6f4..f50f48c34f 100644 --- a/crates/solidity/testing/utils/src/cst_snapshots/test_nodes.rs +++ b/crates/solidity/testing/utils/src/cst_snapshots/test_nodes.rs @@ -1,10 +1,10 @@ +use std::ops::ControlFlow; use std::rc::Rc; use anyhow::Result; use slang_solidity::{ cst::Node, cst::RuleNode, cst::TokenNode, cursor::Cursor, kinds::RuleKind, kinds::TokenKind, - text_index::TextRange, visitor::Visitor, visitor::VisitorEntryResponse, - visitor::VisitorExitResponse, + text_index::TextRange, visitor::Step, visitor::Visitor, }; #[derive(Debug)] @@ -29,21 +29,17 @@ impl Visitor<()> for TestNodeBuilder { &mut self, _node: &Rc, _cursor: &Cursor, - ) -> std::result::Result { + ) -> Result, ()> { self.stack.push(vec![]); - Ok(VisitorEntryResponse::StepIn) + Ok(ControlFlow::Continue(Step::In)) } - fn rule_exit( - &mut self, - node: &Rc, - cursor: &Cursor, - ) -> std::result::Result { + fn rule_exit(&mut self, node: &Rc, cursor: &Cursor) -> Result, ()> { let children = self.stack.pop().unwrap(); if (node.kind == RuleKind::LeadingTrivia) | (node.kind == RuleKind::TrailingTrivia) { if children.is_empty() { - return Ok(VisitorExitResponse::Continue); + return Ok(ControlFlow::Continue(())); } } @@ -54,14 +50,10 @@ impl Visitor<()> for TestNodeBuilder { }; self.stack.last_mut().unwrap().push(new_node); - Ok(VisitorExitResponse::Continue) + Ok(ControlFlow::Continue(())) } - fn token( - &mut self, - node: &Rc, - cursor: &Cursor, - ) -> std::result::Result { + fn token(&mut self, node: &Rc, cursor: &Cursor) -> Result, ()> { if !Self::is_whitespace(node) { let kind = if Self::is_comment(node) { TestNodeKind::Trivia(node.kind) @@ -77,7 +69,7 @@ impl Visitor<()> for TestNodeBuilder { self.stack.last_mut().unwrap().push(new_node); } - Ok(VisitorExitResponse::Continue) + Ok(ControlFlow::Continue(())) } } diff --git a/documentation/public/user-guide/cargo-crate/index.md b/documentation/public/user-guide/cargo-crate/index.md index a422b9b78c..a9a2138bff 100644 --- a/documentation/public/user-guide/cargo-crate/index.md +++ b/documentation/public/user-guide/cargo-crate/index.md @@ -57,10 +57,10 @@ The below example uses a cursor to collect the names of all contracts in a sourc ## Visitor API The `Visitor` trait allows callers to implement a visitor that will be called for each node in the tree. -The `VisitorEntryResponse` enum allows callers to control the traversal behavior. +The `std::ops::ControlFlow` enum coupled with the `Step` enum allows callers to control the traversal behavior. -For example, if the visitor is only interested in the top-level nodes, it can return `VisitorEntryResponse::StepOver` to skip the children of the current node. -If the visitor is interested in the children of the current node, it can return `VisitorEntryResponse::StepIn` to visit them. +For example, if the visitor is only interested in the top-level nodes, it can return `ControlFlow::Continue(Step::Over)` to skip the children of the current node. +If the visitor is interested in the children of the current node, it can return `ControlFlow::Continue(Step::In)` to visit them. The below example defines a visitor that collects the names of all contracts in a source file, and returns them as a `Vec`: