diff --git a/.changeset/few-taxis-retire.md b/.changeset/few-taxis-retire.md new file mode 100644 index 0000000000..7b924c5feb --- /dev/null +++ b/.changeset/few-taxis-retire.md @@ -0,0 +1,5 @@ +--- +"@nomicfoundation/slang": minor +--- + +Tree Query Language: queries now ignore trivia nodes. diff --git a/.changeset/gentle-shirts-deliver.md b/.changeset/gentle-shirts-deliver.md new file mode 100644 index 0000000000..77e5b8b740 --- /dev/null +++ b/.changeset/gentle-shirts-deliver.md @@ -0,0 +1,5 @@ +--- +"@nomicfoundation/slang": minor +--- + +Tree Query Language: remove the ellipsis query `...` operator making it implicit, add an adjacency operator `.`. diff --git a/crates/metaslang/cst/src/query/engine.rs b/crates/metaslang/cst/src/query/engine.rs index ff5f1284a1..995dfa2b89 100644 --- a/crates/metaslang/cst/src/query/engine.rs +++ b/crates/metaslang/cst/src/query/engine.rs @@ -9,7 +9,7 @@ use super::model::{ }; use crate::cst::NodeKind; use crate::query::CaptureQuantifier; -use crate::KindTypes; +use crate::{KindTypes, TerminalKind as _}; impl Cursor { pub fn query(self, queries: Vec>) -> QueryMatchIterator { @@ -46,6 +46,8 @@ impl Cursor { NodeSelector::EdgeLabelAndNodeText { .. } => false, }, + Node::::Terminal(terminal) if terminal.kind.is_trivia() => false, + Node::::Terminal(terminal) => match node_selector { NodeSelector::Anonymous => true, NodeSelector::NodeKind { node_kind } => { @@ -79,31 +81,67 @@ impl ASTNode { Self::Sequence(matcher) => matcher.children[0].can_match(cursor), Self::OneOrMore(matcher) => matcher.child.can_match(cursor), Self::Optional(_) => true, - Self::Ellipsis => true, + Self::Adjacency => true, } } - fn create_matcher(&self, cursor: Cursor) -> MatcherRef { + // The `require_explicit_match` parameter modifies the behaviour of this and + // later matchers. If this value is true, this and later matchers should not + // match sibling nodes implicitly. + // Currently this only modifies the behaviour of the ellipsis matcher, which + // otherwise will attempt to consume any number of sibling nodes. + // In a sequence of matchers, this value is set to true by the ellipsis + // operator itself, to consume all available sibling nodes and prevent later + // ellipsis matchers from doing so. + // Conversely, it's set to false by the `NodeMatcher`, both when recursing + // into its children and for later matchers after itself, as it handles an + // explicit match requested by the user. + // All other matchers should propagate the received value forward. + // + // The whole point of propagating this flag is to prevent a weird + // interaction between ellipsis operators working on the same set of sibling + // nodes. While two consecutive ellipsis operators should never happen, we + // have the `OptionalMatcher` which will not consume any nodes in the nil + // case. This means that `... [_]? ...` will effectively work (in one case) + // as `... ...`. If we allow both ellipsis operators to consume any number + // of nodes, for a sequence of N nodes we get N+1 identical query results + // when the operators take turns matching each prefix and complementary + // suffix of the list of nodes. By only allowing the first ellipsis operator + // to consume an arbitrary number of nodes, we reduce the returned matches + // to a single one. + // + fn create_matcher(&self, cursor: Cursor, require_explicit_match: bool) -> MatcherRef { match self { - Self::Capture(matcher) => { - Box::new(CaptureMatcher::::new(Rc::clone(matcher), cursor)) - } + Self::Capture(matcher) => Box::new(CaptureMatcher::::new( + Rc::clone(matcher), + cursor, + require_explicit_match, + )), Self::NodeMatch(matcher) => { + // By definition this matcher matches nodes explicitly Box::new(NodeMatchMatcher::::new(Rc::clone(matcher), cursor)) } - Self::Sequence(matcher) => { - Box::new(SequenceMatcher::::new(Rc::clone(matcher), cursor)) - } - Self::Alternatives(matcher) => { - Box::new(AlternativesMatcher::::new(Rc::clone(matcher), cursor)) - } - Self::Optional(matcher) => { - Box::new(OptionalMatcher::::new(Rc::clone(matcher), cursor)) - } - Self::OneOrMore(matcher) => { - Box::new(OneOrMoreMatcher::::new(Rc::clone(matcher), cursor)) - } - Self::Ellipsis => Box::new(EllipsisMatcher::::new(cursor)), + Self::Sequence(matcher) => Box::new(SequenceMatcher::::new( + Rc::clone(matcher), + cursor, + require_explicit_match, + )), + Self::Alternatives(matcher) => Box::new(AlternativesMatcher::::new( + Rc::clone(matcher), + cursor, + require_explicit_match, + )), + Self::Optional(matcher) => Box::new(OptionalMatcher::::new( + Rc::clone(matcher), + cursor, + require_explicit_match, + )), + Self::OneOrMore(matcher) => Box::new(OneOrMoreMatcher::::new( + Rc::clone(matcher), + cursor, + require_explicit_match, + )), + Self::Adjacency => Box::new(AdjacencyMatcher::::new(cursor, require_explicit_match)), } } } @@ -112,7 +150,7 @@ pub struct QueryMatch { pub queries: Rc>>, pub query_number: usize, pub root_cursor: Cursor, - // These correspond to the capture definitions in tne query + // These correspond to the capture definitions in the query pub captures: BTreeMap>>, } @@ -179,7 +217,8 @@ impl QueryMatchIterator { while self.query_number < self.queries.len() { let ast_node = &self.queries[self.query_number].ast_node; if ast_node.can_match(&self.cursor) { - self.matcher = Some(ast_node.create_matcher(self.cursor.clone())); + // The first matcher in the query should allow implicit matches + self.matcher = Some(ast_node.create_matcher(self.cursor.clone(), false)); return; }; self.query_number += 1; @@ -216,11 +255,28 @@ impl Iterator for QueryMatchIterator { } } +#[derive(Clone)] +struct MatcherResult { + // if cursor.is_completed() -> end of input + // if !cursor.is_completed() -> there is more input to go + cursor: Cursor, + + // Controls whether next matchers can match nodes implicitly. For matchers + // applied on a sequence of sibling nodes, this will be: + // - initially false, allowing the first found ellipsis matcher to consume + // an arbitrary number of nodes + // - true after the execution of an ellipsis, thus preventing later ellipsis + // from consuming nodes + // - propagated forward by other matchers, until + // - an actual `NodeMatcher` successfully matches a node, which then flips + // this value back to false + require_explicit_match: bool, +} + trait Matcher { // None -> failed to match, you must backtrack. DO NOT call again - // Some(cursor) if cursor.is_complete -> matched, end of input - // Some(cursor) if !cursor.is_complete -> matched, more input to go - fn next(&mut self) -> Option>; + // Some(result) -> matched, check result.cursor and pass require_explicit_match forward + fn next(&mut self) -> Option>; fn record_captures(&self, captures: &mut BTreeMap>>); } type MatcherRef = Box>; @@ -232,8 +288,14 @@ struct CaptureMatcher { } impl CaptureMatcher { - fn new(matcher: Rc>, cursor: Cursor) -> Self { - let child = matcher.child.create_matcher(cursor.clone()); + fn new( + matcher: Rc>, + cursor: Cursor, + require_explicit_match: bool, + ) -> Self { + let child = matcher + .child + .create_matcher(cursor.clone(), require_explicit_match); Self { matcher, cursor, @@ -243,7 +305,7 @@ impl CaptureMatcher { } impl Matcher for CaptureMatcher { - fn next(&mut self) -> Option> { + fn next(&mut self) -> Option> { self.child.next() } @@ -275,7 +337,7 @@ impl NodeMatchMatcher { } impl Matcher for NodeMatchMatcher { - fn next(&mut self) -> Option> { + fn next(&mut self) -> Option> { if self.cursor.is_completed() { return None; } @@ -293,25 +355,39 @@ impl Matcher for NodeMatchMatcher { if let Some(child) = self.matcher.child.as_ref() { let mut child_cursor = self.cursor.clone(); if !child_cursor.go_to_first_child() { + // We have child matchers, but no children. return None; } - self.child = Some(child.create_matcher(child_cursor)); + // Start traversing the children nodes allowing an ellipsis + // operator to match implicitly. + self.child = Some(child.create_matcher(child_cursor, false)); } else { + // We have no child matchers, we can return the result now. let mut return_cursor = self.cursor.clone(); return_cursor.irrevocably_go_to_next_sibling(); - return Some(return_cursor); + return Some(MatcherResult { + cursor: return_cursor, + require_explicit_match: false, + }); } } if let Some(child) = self.child.as_mut() { - while let Some(cursor) = child.as_mut().next() { + // Match our children with the child matcher repeatedly. + while let Some(MatcherResult { cursor, .. }) = child.as_mut().next() { if cursor.is_completed() { + // If match found and exhausted our children list, return + // the match *from our own cursor* let mut return_cursor = self.cursor.clone(); return_cursor.irrevocably_go_to_next_sibling(); - return Some(return_cursor); + return Some(MatcherResult { + cursor: return_cursor, + require_explicit_match: false, + }); } } + // No more matches from the child matcher, we will backtrack at this point. self.child = None; } @@ -325,43 +401,107 @@ impl Matcher for NodeMatchMatcher { } } +enum SequenceItem { + ChildMatcher(usize), + Ellipsis, +} + struct SequenceMatcher { matcher: Rc>, children: Vec>, cursor: Cursor, is_initialised: bool, + template: Vec, + require_explicit_match: bool, } -impl SequenceMatcher { - fn new(matcher: Rc>, cursor: Cursor) -> Self { +impl SequenceMatcher { + fn new( + matcher: Rc>, + cursor: Cursor, + require_explicit_match: bool, + ) -> Self { + // Produce a template of instructions to create the matchers for the + // sequence by inserting ellipsis matchers at the start, end, and in + // between each of the child matchers, unless we find an adjacency + // operator. If the sequence is adjacent (eg. option in alt or + // quantified group sequence) then we should not add matchers at the + // edges. + let (mut template, last_adjacent) = matcher.children.iter().enumerate().fold( + (Vec::new(), matcher.adjacent), + |(mut acc, last_adjacent), (index, child)| { + if matches!(child, ASTNode::Adjacency) { + if last_adjacent { + unreachable!("Found two consecutive adjacency operators") + } + acc.push(SequenceItem::ChildMatcher(index)); + (acc, true) + } else { + if !last_adjacent { + acc.push(SequenceItem::Ellipsis); + } + acc.push(SequenceItem::ChildMatcher(index)); + (acc, false) + } + }, + ); + if !last_adjacent && !matcher.adjacent { + template.push(SequenceItem::Ellipsis); + } Self { matcher, children: vec![], cursor, is_initialised: false, + template, + require_explicit_match, + } + } + + fn create_matcher( + &self, + index: usize, + cursor: Cursor, + require_explicit_match: bool, + ) -> MatcherRef { + let item = &self.template[index]; + match item { + SequenceItem::Ellipsis => { + Box::new(EllipsisMatcher::new(cursor, require_explicit_match)) + } + SequenceItem::ChildMatcher(index) => { + self.matcher.children[*index].create_matcher(cursor, require_explicit_match) + } } } } impl Matcher for SequenceMatcher { - fn next(&mut self) -> Option> { + fn next(&mut self) -> Option> { if !self.is_initialised { self.is_initialised = true; let child_cursor = self.cursor.clone(); - let child = self.matcher.children[0].create_matcher(child_cursor); + let child = self.create_matcher(0, child_cursor, self.require_explicit_match); self.children.push(child); } while !self.children.is_empty() { - if let Some(child_cursor) = self.children.last_mut().unwrap().next() { - if self.children.len() == self.matcher.children.len() { - return Some(child_cursor); + if let Some(child_matcher_result) = self.children.last_mut().unwrap().next() { + if self.children.len() == self.template.len() { + // Last child, return its result as our own + return Some(child_matcher_result); } - - let child = self.matcher.children[self.children.len()].create_matcher(child_cursor); + // Create the next child matcher propagating the + // `require_explicit_match` flag forward. + let child = self.create_matcher( + self.children.len(), + child_matcher_result.cursor, + child_matcher_result.require_explicit_match, + ); self.children.push(child); } else { + // Backtrack self.children.pop(); } } @@ -381,26 +521,35 @@ struct AlternativesMatcher { next_child_number: usize, child: Option>, cursor: Cursor, + require_explicit_match: bool, } impl AlternativesMatcher { - fn new(matcher: Rc>, cursor: Cursor) -> Self { + fn new( + matcher: Rc>, + cursor: Cursor, + require_explicit_match: bool, + ) -> Self { Self { matcher, next_child_number: 0, child: None, cursor, + require_explicit_match, } } } impl Matcher for AlternativesMatcher { - fn next(&mut self) -> Option> { + fn next(&mut self) -> Option> { loop { if self.child.is_none() { + // Create the next available child matcher forwarding the + // `require_explicit_match` flag, or give up if we have no more match self.matcher.children.get(self.next_child_number) { Some(child) => { - let child = child.create_matcher(self.cursor.clone()); + let child = + child.create_matcher(self.cursor.clone(), self.require_explicit_match); self.child = Some(child); self.next_child_number += 1; } @@ -409,7 +558,7 @@ impl Matcher for AlternativesMatcher { } match self.child.as_mut().unwrap().next() { - Some(cursor) => return Some(cursor), + Some(child_matcher_result) => return Some(child_matcher_result), None => self.child = None, } } @@ -425,37 +574,52 @@ struct OptionalMatcher { child: Option>, cursor: Cursor, have_nonempty_match: bool, + require_explicit_match: bool, } impl OptionalMatcher { - fn new(matcher: Rc>, cursor: Cursor) -> Self { + fn new( + matcher: Rc>, + cursor: Cursor, + require_explicit_match: bool, + ) -> Self { Self { matcher, child: None, cursor, have_nonempty_match: false, + require_explicit_match, } } } impl Matcher for OptionalMatcher { - fn next(&mut self) -> Option> { + fn next(&mut self) -> Option> { if let Some(child) = self.child.as_mut() { - match child.next() { - r#match @ Some(_) => { - self.have_nonempty_match = true; - r#match - } - None => { - self.child = None; - None - } + // Second visit, we have a child matcher created + if let Some(child_matcher_result) = child.next() { + self.have_nonempty_match = true; + Some(child_matcher_result) + } else { + self.child = None; + None } } else { + // First visit, we don't have a child matcher yet, so create it + // forwarding our `require_explicit_match` flag let child_cursor = self.cursor.clone(); - let child = self.matcher.child.create_matcher(child_cursor); + let child = self + .matcher + .child + .create_matcher(child_cursor, self.require_explicit_match); self.child = Some(child); - Some(self.cursor.clone()) + + // Return a match result for the empty case, forwarding the + // `require_explicit_match` flag. + Some(MatcherResult { + cursor: self.cursor.clone(), + require_explicit_match: self.require_explicit_match, + }) } } @@ -471,36 +635,43 @@ impl Matcher for OptionalMatcher { struct OneOrMoreMatcher { matcher: Rc>, children: Vec>, - cursor_for_next_repetition: Option>, + result_for_next_repetition: Option>, } impl OneOrMoreMatcher { - fn new(matcher: Rc>, cursor: Cursor) -> Self { - let cursor_for_next_repetition = Some(cursor); + fn new( + matcher: Rc>, + cursor: Cursor, + require_explicit_match: bool, + ) -> Self { + let result_for_next_repetition = Some(MatcherResult { + cursor, + require_explicit_match, + }); Self { matcher, children: vec![], - cursor_for_next_repetition, + result_for_next_repetition, } } } impl Matcher for OneOrMoreMatcher { - fn next(&mut self) -> Option> { + fn next(&mut self) -> Option> { loop { - if let Some(cursor_for_next_repetition) = self.cursor_for_next_repetition.take() { + if let Some(last_result) = self.result_for_next_repetition.take() { let next_child = self .matcher .child - .create_matcher(cursor_for_next_repetition); + .create_matcher(last_result.cursor, last_result.require_explicit_match); self.children.push(next_child); } else { let tail = self.children.last_mut().unwrap(); - if let Some(cursor) = tail.next() { - if !cursor.is_completed() { - self.cursor_for_next_repetition = Some(cursor.clone()); + if let Some(child_matcher_result) = tail.next() { + if !child_matcher_result.cursor.is_completed() { + self.result_for_next_repetition = Some(child_matcher_result.clone()); } - return Some(cursor); + return Some(child_matcher_result); } self.children.pop(); if self.children.is_empty() { @@ -517,29 +688,50 @@ impl Matcher for OneOrMoreMatcher { } } +/// Matches any number of sibling nodes and is used in between other matchers +/// when matching sequences, unless an explicit adjacency operator is found. +/// If `require_explicit_match` is true, then this matcher can only return a +/// result for the empty case. This usually means that in the same sequence of +/// siblings we found a previous ellipsis matcher which will be able to consume +/// an arbitrary number of nodes. Then, the value is false if this is the first +/// `EllipsisMatcher` in a sibling list, or there was an explicit match (by a +/// `NodeMatcher`) in a previous matcher of the sequence. struct EllipsisMatcher { cursor: Cursor, has_returned_initial_empty_value: bool, + require_explicit_match: bool, } impl EllipsisMatcher { - fn new(cursor: Cursor) -> Self { + fn new(cursor: Cursor, require_explicit_match: bool) -> Self { Self { cursor, has_returned_initial_empty_value: false, + require_explicit_match, } } } impl Matcher for EllipsisMatcher { - fn next(&mut self) -> Option> { + fn next(&mut self) -> Option> { + // First visit, we always return a match for empty case if !self.has_returned_initial_empty_value { self.has_returned_initial_empty_value = true; - return Some(self.cursor.clone()); + // We need later matchers to avoid consuming nodes + return Some(MatcherResult { + cursor: self.cursor.clone(), + require_explicit_match: true, + }); } - if self.cursor.irrevocably_go_to_next_sibling() { - return Some(self.cursor.clone()); + // Subsequent visits: we only consume nodes if an explicit match is not + // required, ie. if this is the *first* ellipsis operator in a sibling + // sequence or there was an explicit match before us. + if !self.require_explicit_match && self.cursor.irrevocably_go_to_next_sibling() { + return Some(MatcherResult { + cursor: self.cursor.clone(), + require_explicit_match: true, + }); } None @@ -547,3 +739,36 @@ impl Matcher for EllipsisMatcher { fn record_captures(&self, _: &mut BTreeMap>>) {} } + +/// Greedily consumes available trivia nodes only +struct AdjacencyMatcher { + cursor: Option>, + require_explicit_match: bool, +} + +impl AdjacencyMatcher { + fn new(cursor: Cursor, require_explicit_match: bool) -> Self { + Self { + cursor: Some(cursor), + require_explicit_match, + } + } +} + +impl Matcher for AdjacencyMatcher { + fn next(&mut self) -> Option> { + if let Some(mut cursor) = self.cursor.take() { + while !cursor.is_completed() && cursor.node().is_trivia() { + cursor.irrevocably_go_to_next_sibling(); + } + Some(MatcherResult { + cursor, + require_explicit_match: self.require_explicit_match, + }) + } else { + None + } + } + + fn record_captures(&self, _: &mut BTreeMap>>) {} +} diff --git a/crates/metaslang/cst/src/query/model.rs b/crates/metaslang/cst/src/query/model.rs index a2f3a8dd1c..4a61405d9a 100644 --- a/crates/metaslang/cst/src/query/model.rs +++ b/crates/metaslang/cst/src/query/model.rs @@ -81,7 +81,7 @@ impl Query { capture_quantifiers, )?; } - ASTNode::Ellipsis => {} + ASTNode::Adjacency => {} } Ok(()) } @@ -113,7 +113,7 @@ pub enum ASTNode { Alternatives(Rc>), Sequence(Rc>), OneOrMore(Rc>), - Ellipsis, + Adjacency, } impl ASTNode { @@ -167,7 +167,7 @@ impl fmt::Display for ASTNode { Self::OneOrMore(one_or_more) => { write!(f, "({})+", one_or_more.child) } - Self::Ellipsis => write!(f, "..."), + Self::Adjacency => write!(f, "."), } } } @@ -256,6 +256,10 @@ pub struct NodeMatchASTNode { #[derive(Debug)] pub struct SequenceASTNode { pub children: Vec>, + // By default sequences can match any number of nodes at the beginning and + // end of it. Setting this value to true prevents it and instead forces + // strict adjacency at the edges. + pub adjacent: bool, } #[derive(Debug)] diff --git a/crates/metaslang/cst/src/query/parser.rs b/crates/metaslang/cst/src/query/parser.rs index a43ca8f205..6f490ac9a2 100644 --- a/crates/metaslang/cst/src/query/parser.rs +++ b/crates/metaslang/cst/src/query/parser.rs @@ -2,10 +2,10 @@ use std::fmt; use std::rc::Rc; use nom::branch::alt; -use nom::bytes::complete::{is_not, tag, take_while, take_while1, take_while_m_n}; -use nom::character::complete::{char, multispace0, multispace1, satisfy}; +use nom::bytes::complete::{is_not, take_while, take_while1, take_while_m_n}; +use nom::character::complete::{char, multispace0, multispace1, none_of, satisfy}; use nom::combinator::{ - all_consuming, cut, map_opt, map_res, opt, peek, recognize, success, value, verify, + all_consuming, cut, eof, map_opt, map_res, opt, peek, recognize, success, value, verify, }; use nom::error::{ErrorKind, FromExternalError, ParseError}; use nom::multi::{fold_many0, many1, separated_list1}; @@ -19,7 +19,7 @@ use super::model::{ }; use crate::cst::NodeKind; use crate::text_index::TextIndex; -use crate::{AbstractKind as _, KindTypes}; +use crate::{AbstractKind as _, KindTypes, TerminalKind as _}; // ---------------------------------------------------------------------------- // Parse errors @@ -47,10 +47,21 @@ enum QueryParserErrorKind { Syntax(QuerySyntaxError), } +#[derive(Clone)] enum QuerySyntaxError { EdgeLabel(String), NodeKind(String), EscapedUnicode, + DeprecatedEllipsis, + ForbiddenTriviaKind, +} + +impl QueryParserError { + fn from_query_syntax_error(input: I, error: QuerySyntaxError) -> Self { + QueryParserError { + errors: vec![(input, QueryParserErrorKind::Syntax(error))], + } + } } impl ParseError for QueryParserError { @@ -74,9 +85,7 @@ impl ParseError for QueryParserError { impl FromExternalError for QueryParserError { fn from_external_error(input: I, _kind: ErrorKind, e: QuerySyntaxError) -> Self { - QueryParserError { - errors: vec![(input, QueryParserErrorKind::Syntax(e))], - } + Self::from_query_syntax_error(input, e) } } @@ -88,6 +97,12 @@ impl fmt::Display for QuerySyntaxError { QuerySyntaxError::EscapedUnicode => { write!(f, "Invalid escaped Unicode character") } + QuerySyntaxError::DeprecatedEllipsis => { + write!(f, "The ellipsis `...` operator is deprecated, and replaced with a new adjacency `.` operator. For more information, check the Tree Query Language guide: https://nomicfoundation.github.io/slang/user-guide/tree-query-language/") + } + QuerySyntaxError::ForbiddenTriviaKind => { + write!(f, "Matching trivia nodes directly is forbidden. For more information, check the Tree Query Language guide: https://nomicfoundation.github.io/slang/user-guide/tree-query-language/") + } } } } @@ -149,7 +164,7 @@ fn compute_row_and_column(target: &str, input: &str) -> TextIndex { fn parse_matcher_alternatives( i: &str, ) -> IResult<&str, ASTNode, QueryParserError<&str>> { - separated_list1(token('|'), parse_matcher_sequence::) + separated_list1(token('|'), parse_matcher_alt_sequence::) .map(|mut children| { if children.len() == 1 { children.pop().unwrap() @@ -163,38 +178,76 @@ fn parse_matcher_alternatives( fn parse_matcher_sequence( i: &str, ) -> IResult<&str, ASTNode, QueryParserError<&str>> { - many1(parse_quantified_matcher::) - .map(|mut children| { - if children.len() == 1 { - children.pop().unwrap() - } else { - ASTNode::Sequence(Rc::new(SequenceASTNode { children })) - } - }) - .parse(i) + verify( + many1(parse_sequence_item::), + |children: &[ASTNode]| { + // It doesn't make sense for a sequence to be a single adjacency operator + children.len() > 1 || !matches!(children[0], ASTNode::Adjacency) + }, + ) + .map(|children| { + ASTNode::Sequence(Rc::new(SequenceASTNode { + children, + adjacent: false, + })) + }) + .parse(i) } -fn parse_quantified_matcher( +fn parse_matcher_alt_sequence( i: &str, ) -> IResult<&str, ASTNode, QueryParserError<&str>> { + verify( + many1(parse_sequence_item::), + |children: &[ASTNode]| { + // Alternative sequences cannot start or end with an adjacency + // operator, because it is implicitly adjacent to the previous and + // next matchers + !matches!(children[0], ASTNode::Adjacency) + && !matches!(children[children.len() - 1], ASTNode::Adjacency) + }, + ) + .map(|mut children| { + if children.len() == 1 { + // Alternative sequences of length 1 can be simplified to the child pattern + children.pop().unwrap() + } else { + ASTNode::Sequence(Rc::new(SequenceASTNode { + children, + adjacent: true, + })) + } + }) + .parse(i) +} + +fn parse_sequence_item(i: &str) -> IResult<&str, ASTNode, QueryParserError<&str>> { alt(( - ellipsis_token.map(|_| ASTNode::Ellipsis), // Cannot be quantified - pair( - parse_bound_matcher, - parse_trailing_quantifier, // admits epsilon - ) - .map(|(child, quantifier)| match quantifier { - CaptureQuantifier::ZeroOrOne => ASTNode::Optional(Rc::new(OptionalASTNode { child })), - CaptureQuantifier::ZeroOrMore => ASTNode::Optional(Rc::new(OptionalASTNode { - child: ASTNode::OneOrMore(Rc::new(OneOrMoreASTNode { child })), - })), - CaptureQuantifier::OneOrMore => ASTNode::OneOrMore(Rc::new(OneOrMoreASTNode { child })), - CaptureQuantifier::One => child, - }), + ellipsis_token, + adjacency_operator::, + parse_quantified_matcher::, )) .parse(i) } +fn parse_quantified_matcher( + i: &str, +) -> IResult<&str, ASTNode, QueryParserError<&str>> { + pair( + parse_bound_matcher, + parse_trailing_quantifier, // admits epsilon + ) + .map(|(child, quantifier)| match quantifier { + CaptureQuantifier::ZeroOrOne => ASTNode::Optional(Rc::new(OptionalASTNode { child })), + CaptureQuantifier::ZeroOrMore => ASTNode::Optional(Rc::new(OptionalASTNode { + child: ASTNode::OneOrMore(Rc::new(OneOrMoreASTNode { child })), + })), + CaptureQuantifier::OneOrMore => ASTNode::OneOrMore(Rc::new(OneOrMoreASTNode { child })), + CaptureQuantifier::One => child, + }) + .parse(i) +} + fn parse_bound_matcher(i: &str) -> IResult<&str, ASTNode, QueryParserError<&str>> { pair( opt(capture_name_token), @@ -307,7 +360,10 @@ fn anonymous_selector( terminated( terminated( char('_'), - peek(satisfy(|c| c != '_' && !c.is_alphanumeric())), + peek( + eof.map(|_| ' ') + .or(satisfy(|c| c != '_' && !c.is_alphanumeric())), + ), ), multispace0, ) @@ -319,14 +375,21 @@ fn kind_token(i: &str) -> IResult<&str, NodeKind, QueryParserEr terminated( preceded( peek(satisfy(|c| c.is_alphabetic() || c == '_')), - cut(map_res(raw_identifier, |id| { - T::TerminalKind::try_from_str(id.as_str()) - .map(NodeKind::Terminal) - .or_else(|_| { - T::NonterminalKind::try_from_str(id.as_str()).map(NodeKind::Nonterminal) - }) - .or(Err(QuerySyntaxError::NodeKind(id))) - })), + cut(map_res( + raw_identifier, + |id| match T::TerminalKind::try_from_str(id.as_str()) { + Ok(kind) => { + if kind.is_trivia() { + Err(QuerySyntaxError::ForbiddenTriviaKind) + } else { + Ok(NodeKind::Terminal(kind)) + } + } + Err(_) => T::NonterminalKind::try_from_str(id.as_str()) + .map(NodeKind::Nonterminal) + .or(Err(QuerySyntaxError::NodeKind(id))), + }, + )), ), multispace0, ) @@ -413,10 +476,43 @@ fn text_token(i: &str) -> IResult<&str, String, QueryParserError<&str>> { .parse(i) } -fn ellipsis_token(i: &str) -> IResult<&str, &str, QueryParserError<&str>> { - terminated(tag("..."), multispace0).parse(i) -} - fn token<'input>(c: char) -> impl Parser<&'input str, char, QueryParserError<&'input str>> { terminated(char(c), multispace0) } + +fn adjacency_operator(i: &str) -> IResult<&str, ASTNode, QueryParserError<&str>> { + // An adjacency operator is a single '.' character, and cannot be followed + // by another adjacency operator + pair(token('.'), cut(peek(none_of(". \t\r\n")))) + .map(|_| ASTNode::Adjacency) + .parse(i) +} + +fn recognize_as_failure( + error: QuerySyntaxError, + mut parser: F, +) -> impl FnMut(I) -> IResult> +where + F: nom::Parser>, +{ + use nom::Err::Failure; + move |input: I| { + let i = input.clone(); + match parser.parse(i) { + Ok((_, _)) => Err(Failure(QueryParserError::from_query_syntax_error( + input, + error.clone(), + ))), + Err(e) => Err(e), + } + } +} + +fn ellipsis_token(i: &str) -> IResult<&str, O, QueryParserError<&str>> { + use nom::bytes::complete::tag; + recognize_as_failure( + QuerySyntaxError::DeprecatedEllipsis, + terminated(tag("..."), multispace0), + ) + .parse(i) +} diff --git a/crates/solidity/inputs/language/bindings/rules.msgb b/crates/solidity/inputs/language/bindings/rules.msgb index 524d8c64c3..ad683dc7c9 100644 --- a/crates/solidity/inputs/language/bindings/rules.msgb +++ b/crates/solidity/inputs/language/bindings/rules.msgb @@ -94,8 +94,7 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i } ;; Top-level definitions... -@source_unit [SourceUnit ... [SourceUnitMembers - ... +@source_unit [SourceUnit [SourceUnitMembers [SourceUnitMember @unit_member ( [ContractDefinition] | [InterfaceDefinition] @@ -108,8 +107,7 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i | [UserDefinedValueTypeDefinition] | [EventDefinition] )] - ... -] ...] { +]] { edge @unit_member.lexical_scope -> @source_unit.lexical_scope ;; ... are available in the file's lexical scope @@ -124,7 +122,7 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i ;;; Named definitions (contracts, functions, libraries, etc.) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -@contract [ContractDefinition ... @name name: [Identifier] ...] { +@contract [ContractDefinition @name name: [Identifier]] { node def attr (def) node_definition = @name @@ -147,7 +145,7 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i edge type_member -> @contract.type_members } -@interface [InterfaceDefinition ... @name name: [Identifier] ...] { +@interface [InterfaceDefinition @name name: [Identifier]] { node def attr (def) node_definition = @name @@ -170,7 +168,7 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i edge type_member -> @interface.type_members } -@library [LibraryDefinition ... @name name: [Identifier] ...] { +@library [LibraryDefinition @name name: [Identifier]] { node def attr (def) node_definition = @name @@ -183,7 +181,7 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i edge member -> @library.members } -@function [FunctionDefinition ... name: [FunctionName ... @name [Identifier] ...] ...] { +@function [FunctionDefinition name: [FunctionName @name [Identifier]]] { node def attr (def) node_definition = @name @@ -221,16 +219,16 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i } ;; The identifier path constructs a path of nodes connected from right to left -[IdentifierPath ... @name [Identifier] ...] { +[IdentifierPath @name [Identifier]] { node @name.ref attr (@name.ref) node_reference = @name } -@id_path [IdentifierPath ... @name [Identifier] (trailing_trivia:[_])*] { +@id_path [IdentifierPath @name [Identifier] .] { edge @id_path.right -> @name.ref } -[IdentifierPath ... @left_name [Identifier] [Period] @right_name [Identifier] ...] { +[IdentifierPath @left_name [Identifier] . [Period] . @right_name [Identifier]] { node member attr (member) push_symbol = "." @@ -238,7 +236,7 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i edge member -> @left_name.ref } -@id_path [IdentifierPath (leading_trivia:[_])* @name [Identifier] ...] { +@id_path [IdentifierPath . @name [Identifier]] { edge @name.ref -> @id_path.left } @@ -252,11 +250,11 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i node @param.def } -@param [Parameter ... @type_name [TypeName] ...] { +@param [Parameter @type_name [TypeName]] { edge @type_name.type_ref -> @param.lexical_scope } -@param [Parameter ... @type_name [TypeName] ... @name [Identifier]] { +@param [Parameter @type_name [TypeName] @name [Identifier]] { node def attr (def) node_definition = @name @@ -269,11 +267,9 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i edge typeof -> @type_name.output } -@function [FunctionDefinition ... parameters: [ParametersDeclaration - ... - [Parameters ... @param item: [Parameter] ...] - ... -] ...] { +@function [FunctionDefinition parameters: [ParametersDeclaration + [Parameters @param item: [Parameter]] +]] { edge @param.lexical_scope -> @function.lexical_scope ;; Input parameters are available in the function scope @@ -281,11 +277,9 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i attr (@function.lexical_scope -> @param.def) precedence = 1 } -@function [FunctionDefinition ... returns: [ReturnsDeclaration - ... - [ParametersDeclaration ... [Parameters ... @param item: [Parameter] ...] ...] - ... -] ...] { +@function [FunctionDefinition returns: [ReturnsDeclaration + [ParametersDeclaration [Parameters @param item: [Parameter]]] +]] { edge @param.lexical_scope -> @function.lexical_scope ;; Return parameters are available in the function scope @@ -299,29 +293,23 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i ;; NB. free-functions (ie. those defined at the file's level) are already ;; covered above -@contract [ContractDefinition ... members: [ContractMembers - ... +@contract [ContractDefinition members: [ContractMembers item: [ContractMember @function variant: [FunctionDefinition]] - ... -] ...] { +]] { edge @function.lexical_scope -> @contract.lexical_scope edge @contract.members -> @function.def } -@interface [InterfaceDefinition ... members: [InterfaceMembers - ... +@interface [InterfaceDefinition members: [InterfaceMembers item: [ContractMember @function variant: [FunctionDefinition]] - ... -] ...] { +]] { edge @function.lexical_scope -> @interface.lexical_scope edge @interface.members -> @function.def } -@library [LibraryDefinition ... members: [LibraryMembers - ... +@library [LibraryDefinition members: [LibraryMembers item: [ContractMember @function variant: [FunctionDefinition]] - ... -] ...] { +]] { edge @function.lexical_scope -> @library.lexical_scope edge @library.members -> @function.def } @@ -351,20 +339,20 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i } ;; The first statement in a block -@block [Block ... [Statements @stmt [Statement] ...] ...] { +@block [Block [Statements . @stmt [Statement]]] { if (version-matches ">= 0.5.0") { edge @stmt.lexical_scope -> @block.lexical_scope } } ;; Two consecutive statements -[Statements ... @left_stmt [Statement] @right_stmt [Statement] ...] { +[Statements @left_stmt [Statement] . @right_stmt [Statement]] { if (version-matches ">= 0.5.0") { edge @right_stmt.lexical_scope -> @left_stmt.lexical_scope } } -@block [Block ... [Statements ... @stmt [Statement]...] ...] { +@block [Block [Statements @stmt [Statement]]] { ;; Hoist statement definitions for Solidity < 0.5.0 if (version-matches "< 0.5.0") { ;; definitions are carried over to the block @@ -390,7 +378,7 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i } ;; Connect the function body's block lexical scope to the function -@function [FunctionDefinition ... [FunctionBody @block [Block]] ...] { +@function [FunctionDefinition [FunctionBody @block [Block]]] { edge @block.lexical_scope -> @function.lexical_scope } @@ -400,11 +388,8 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @stmt [Statement [VariableDeclarationStatement - ... [VariableDeclarationType @var_type [TypeName]] - ... @name name: [Identifier] - ... ]] { node def attr (def) node_definition = @name @@ -419,26 +404,23 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i edge typeof -> @var_type.output } -@stmt [Statement [TupleDeconstructionStatement ... [TupleDeconstructionElements - ... - [TupleDeconstructionElement [TupleMember variant: [UntypedTupleMember ... @name name: [Identifier]]]] - ... -] ...]] { +@stmt [Statement [TupleDeconstructionStatement [TupleDeconstructionElements + [TupleDeconstructionElement [TupleMember variant: [UntypedTupleMember + @name name: [Identifier] + ]]] +]]] { node def attr (def) node_definition = @name edge @stmt.defs -> def } -@stmt [Statement [TupleDeconstructionStatement ... [TupleDeconstructionElements - ... +@stmt [Statement [TupleDeconstructionStatement [TupleDeconstructionElements [TupleDeconstructionElement [TupleMember variant: [TypedTupleMember - ... @member_type type_name: [TypeName] - ... - @name name: [Identifier]]]] - ... -] ...]] { + @name name: [Identifier] + ]]] +]]] { node def attr (def) node_definition = @name @@ -463,11 +445,8 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i } @state_var [StateVariableDefinition - ... @type_name type_name: [TypeName] - ... @name name: [Identifier] - ... ] { node def attr (def) node_definition = @name @@ -485,11 +464,9 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i ;; NB. Even though the grammar allows it, state variables can only be declared ;; inside contracts, and not interfaces or libraries. So, we will only bind ;; contract state variables. -@contract [ContractDefinition ... members: [ContractMembers - ... +@contract [ContractDefinition members: [ContractMembers item: [ContractMember @state_var variant: [StateVariableDefinition]] - ... -] ...] { +]] { edge @state_var.lexical_scope -> @contract.lexical_scope edge @contract.lexical_scope -> @state_var.def } @@ -499,7 +476,7 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i ;;; Enum definitions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -@enum [EnumDefinition ... @name name: [Identifier] ...] { +@enum [EnumDefinition @name name: [Identifier]] { node def attr (def) node_definition = @name @@ -513,9 +490,7 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i } @enum [EnumDefinition - ... - members: [EnumMembers ... @item [Identifier] ...] - ... + members: [EnumMembers @item [Identifier]] ] { node def attr (def) node_definition = @item @@ -527,27 +502,21 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i ;; NB. top-level enums (ie. those defined at the file's level) are already ;; covered above -@contract [ContractDefinition ... members: [ContractMembers - ... +@contract [ContractDefinition members: [ContractMembers item: [ContractMember @enum variant: [EnumDefinition]] - ... -] ...] { +]] { edge @contract.type_members -> @enum.def } -@interface [InterfaceDefinition ... members: [InterfaceMembers - ... +@interface [InterfaceDefinition members: [InterfaceMembers item: [ContractMember @enum variant: [EnumDefinition]] - ... -] ...] { +]] { edge @interface.type_members -> @enum.def } -@library [LibraryDefinition ... members: [LibraryMembers - ... +@library [LibraryDefinition members: [LibraryMembers item: [ContractMember @enum variant: [EnumDefinition]] - ... -] ...] { +]] { edge @library.members -> @enum.def } @@ -556,7 +525,7 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i ;;; Structure definitions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -@struct [StructDefinition ... @name name: [Identifier] ...] { +@struct [StructDefinition @name name: [Identifier]] { node def attr (def) node_definition = @name @@ -573,16 +542,14 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i edge member -> @struct.members } -@struct [StructDefinition ... [StructMembers ... @member item: [StructMember] ...] ...] { +@struct [StructDefinition [StructMembers @member item: [StructMember]]] { node @member.lexical_scope edge @member.lexical_scope -> @struct.lexical_scope } -@struct [StructDefinition ... [StructMembers - ... - @member item: [StructMember ... @type_name [TypeName] ... @name name: [Identifier] ...] - ... -] ...] { +@struct [StructDefinition [StructMembers + @member item: [StructMember @type_name [TypeName] @name name: [Identifier]] +]] { node def attr (def) node_definition = @name @@ -601,29 +568,23 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i ;; NB. top-level enums (ie. those defined at the file's level) are already ;; covered above -@contract [ContractDefinition ... members: [ContractMembers - ... +@contract [ContractDefinition members: [ContractMembers item: [ContractMember @struct variant: [StructDefinition]] - ... -] ...] { +]] { edge @struct.lexical_scope -> @contract.lexical_scope edge @contract.type_members -> @struct.def } -@interface [InterfaceDefinition ... members: [InterfaceMembers - ... +@interface [InterfaceDefinition members: [InterfaceMembers item: [ContractMember @struct variant: [StructDefinition]] - ... -] ...] { +]] { edge @struct.lexical_scope -> @interface.lexical_scope edge @interface.type_members -> @struct.def } -@library [LibraryDefinition ... members: [LibraryMembers - ... +@library [LibraryDefinition members: [LibraryMembers item: [ContractMember @struct variant: [StructDefinition]] - ... -] ...] { +]] { edge @struct.lexical_scope -> @library.lexical_scope edge @library.members -> @struct.def } @@ -640,38 +601,32 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i } ;; General case for nested expressions -@expr [Expression ... variant: [_ ... @child [Expression] ...] ...] { +@expr [Expression variant: [_ @child [Expression]]] { edge @child.lexical_scope -> @expr.lexical_scope } ;; Expressions as statements -@stmt [Statement ... variant: [_ ... @expr [Expression] ...] ...] { +@stmt [Statement variant: [_ @expr [Expression]]] { edge @expr.lexical_scope -> @stmt.lexical_scope } ;; Expressions used for variable declarations -@stmt [Statement ... variant: [VariableDeclarationStatement - ... - value: [VariableDeclarationValue ... @expr [Expression] ...] - ... -] ...] { +@stmt [Statement variant: [VariableDeclarationStatement + value: [VariableDeclarationValue @expr [Expression]] +]] { edge @expr.lexical_scope -> @stmt.lexical_scope } ;; Expressions used for state variable declarations @state_var [StateVariableDefinition - ... - value: [StateVariableDefinitionValue ... @expr [Expression]] - ... + value: [StateVariableDefinitionValue @expr [Expression]] ] { edge @expr.lexical_scope -> @state_var.lexical_scope } ;; Tuple expressions @tuple_expr [Expression [TupleExpression - ... - items: [TupleValues ... [TupleValue @expr [Expression]] ...] - ... + items: [TupleValues [TupleValue @expr [Expression]]] ]] { edge @expr.lexical_scope -> @tuple_expr.lexical_scope } @@ -679,7 +634,7 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i ;;; Identifier expressions -@expr [Expression ... @name variant: [Identifier]] { +@expr [Expression @name variant: [Identifier]] { node ref attr (ref) node_reference = @name @@ -691,13 +646,10 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i ;;; Member access expressions ;; TODO: implement variant for `.address` member -@expr [Expression ... [MemberAccessExpression - ... +@expr [Expression [MemberAccessExpression @operand operand: [Expression] - ... @name member: [Identifier] - ... -...]] { +]] { node ref attr (ref) node_reference = @name @@ -720,14 +672,12 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i } @args [ArgumentsDeclaration [PositionalArgumentsDeclaration - ... - [PositionalArguments ... @argument [Expression] ...] - ... + [PositionalArguments @argument [Expression]] ]] { edge @argument.lexical_scope -> @args.lexical_scope } -@named_arg [NamedArgument ... @name [Identifier] [Colon] @value [Expression]] { +@named_arg [NamedArgument @name [Identifier] [Colon] @value [Expression]] { node @named_arg.lexical_scope edge @value.lexical_scope -> @named_arg.lexical_scope @@ -739,22 +689,18 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i } @args [ArgumentsDeclaration [NamedArgumentsDeclaration - ... - [NamedArgumentGroup ... [NamedArguments ... @argument [NamedArgument] ...] ...] - ... + [NamedArgumentGroup [NamedArguments @argument [NamedArgument]]] ]] { edge @argument.lexical_scope -> @args.lexical_scope } -@funcall [Expression [FunctionCallExpression ... @args [ArgumentsDeclaration]]] { +@funcall [Expression [FunctionCallExpression @args [ArgumentsDeclaration]]] { edge @args.lexical_scope -> @funcall.lexical_scope } ;;; Type expressions -@type_expr [Expression [TypeExpression ... @type [TypeName] ...]] { +@type_expr [Expression [TypeExpression @type [TypeName]]] { edge @type.type_ref -> @type_expr.lexical_scope } - - diff --git a/crates/solidity/outputs/cargo/slang_solidity/src/generated/bindings/generated/binding_rules.rs b/crates/solidity/outputs/cargo/slang_solidity/src/generated/bindings/generated/binding_rules.rs index 0c47769020..28e97320f8 100644 --- a/crates/solidity/outputs/cargo/slang_solidity/src/generated/bindings/generated/binding_rules.rs +++ b/crates/solidity/outputs/cargo/slang_solidity/src/generated/bindings/generated/binding_rules.rs @@ -99,8 +99,7 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i } ;; Top-level definitions... -@source_unit [SourceUnit ... [SourceUnitMembers - ... +@source_unit [SourceUnit [SourceUnitMembers [SourceUnitMember @unit_member ( [ContractDefinition] | [InterfaceDefinition] @@ -113,8 +112,7 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i | [UserDefinedValueTypeDefinition] | [EventDefinition] )] - ... -] ...] { +]] { edge @unit_member.lexical_scope -> @source_unit.lexical_scope ;; ... are available in the file's lexical scope @@ -129,7 +127,7 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i ;;; Named definitions (contracts, functions, libraries, etc.) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -@contract [ContractDefinition ... @name name: [Identifier] ...] { +@contract [ContractDefinition @name name: [Identifier]] { node def attr (def) node_definition = @name @@ -152,7 +150,7 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i edge type_member -> @contract.type_members } -@interface [InterfaceDefinition ... @name name: [Identifier] ...] { +@interface [InterfaceDefinition @name name: [Identifier]] { node def attr (def) node_definition = @name @@ -175,7 +173,7 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i edge type_member -> @interface.type_members } -@library [LibraryDefinition ... @name name: [Identifier] ...] { +@library [LibraryDefinition @name name: [Identifier]] { node def attr (def) node_definition = @name @@ -188,7 +186,7 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i edge member -> @library.members } -@function [FunctionDefinition ... name: [FunctionName ... @name [Identifier] ...] ...] { +@function [FunctionDefinition name: [FunctionName @name [Identifier]]] { node def attr (def) node_definition = @name @@ -226,16 +224,16 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i } ;; The identifier path constructs a path of nodes connected from right to left -[IdentifierPath ... @name [Identifier] ...] { +[IdentifierPath @name [Identifier]] { node @name.ref attr (@name.ref) node_reference = @name } -@id_path [IdentifierPath ... @name [Identifier] (trailing_trivia:[_])*] { +@id_path [IdentifierPath @name [Identifier] .] { edge @id_path.right -> @name.ref } -[IdentifierPath ... @left_name [Identifier] [Period] @right_name [Identifier] ...] { +[IdentifierPath @left_name [Identifier] . [Period] . @right_name [Identifier]] { node member attr (member) push_symbol = "." @@ -243,7 +241,7 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i edge member -> @left_name.ref } -@id_path [IdentifierPath (leading_trivia:[_])* @name [Identifier] ...] { +@id_path [IdentifierPath . @name [Identifier]] { edge @name.ref -> @id_path.left } @@ -257,11 +255,11 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i node @param.def } -@param [Parameter ... @type_name [TypeName] ...] { +@param [Parameter @type_name [TypeName]] { edge @type_name.type_ref -> @param.lexical_scope } -@param [Parameter ... @type_name [TypeName] ... @name [Identifier]] { +@param [Parameter @type_name [TypeName] @name [Identifier]] { node def attr (def) node_definition = @name @@ -274,11 +272,9 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i edge typeof -> @type_name.output } -@function [FunctionDefinition ... parameters: [ParametersDeclaration - ... - [Parameters ... @param item: [Parameter] ...] - ... -] ...] { +@function [FunctionDefinition parameters: [ParametersDeclaration + [Parameters @param item: [Parameter]] +]] { edge @param.lexical_scope -> @function.lexical_scope ;; Input parameters are available in the function scope @@ -286,11 +282,9 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i attr (@function.lexical_scope -> @param.def) precedence = 1 } -@function [FunctionDefinition ... returns: [ReturnsDeclaration - ... - [ParametersDeclaration ... [Parameters ... @param item: [Parameter] ...] ...] - ... -] ...] { +@function [FunctionDefinition returns: [ReturnsDeclaration + [ParametersDeclaration [Parameters @param item: [Parameter]]] +]] { edge @param.lexical_scope -> @function.lexical_scope ;; Return parameters are available in the function scope @@ -304,29 +298,23 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i ;; NB. free-functions (ie. those defined at the file's level) are already ;; covered above -@contract [ContractDefinition ... members: [ContractMembers - ... +@contract [ContractDefinition members: [ContractMembers item: [ContractMember @function variant: [FunctionDefinition]] - ... -] ...] { +]] { edge @function.lexical_scope -> @contract.lexical_scope edge @contract.members -> @function.def } -@interface [InterfaceDefinition ... members: [InterfaceMembers - ... +@interface [InterfaceDefinition members: [InterfaceMembers item: [ContractMember @function variant: [FunctionDefinition]] - ... -] ...] { +]] { edge @function.lexical_scope -> @interface.lexical_scope edge @interface.members -> @function.def } -@library [LibraryDefinition ... members: [LibraryMembers - ... +@library [LibraryDefinition members: [LibraryMembers item: [ContractMember @function variant: [FunctionDefinition]] - ... -] ...] { +]] { edge @function.lexical_scope -> @library.lexical_scope edge @library.members -> @function.def } @@ -356,20 +344,20 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i } ;; The first statement in a block -@block [Block ... [Statements @stmt [Statement] ...] ...] { +@block [Block [Statements . @stmt [Statement]]] { if (version-matches ">= 0.5.0") { edge @stmt.lexical_scope -> @block.lexical_scope } } ;; Two consecutive statements -[Statements ... @left_stmt [Statement] @right_stmt [Statement] ...] { +[Statements @left_stmt [Statement] . @right_stmt [Statement]] { if (version-matches ">= 0.5.0") { edge @right_stmt.lexical_scope -> @left_stmt.lexical_scope } } -@block [Block ... [Statements ... @stmt [Statement]...] ...] { +@block [Block [Statements @stmt [Statement]]] { ;; Hoist statement definitions for Solidity < 0.5.0 if (version-matches "< 0.5.0") { ;; definitions are carried over to the block @@ -395,7 +383,7 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i } ;; Connect the function body's block lexical scope to the function -@function [FunctionDefinition ... [FunctionBody @block [Block]] ...] { +@function [FunctionDefinition [FunctionBody @block [Block]]] { edge @block.lexical_scope -> @function.lexical_scope } @@ -405,11 +393,8 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @stmt [Statement [VariableDeclarationStatement - ... [VariableDeclarationType @var_type [TypeName]] - ... @name name: [Identifier] - ... ]] { node def attr (def) node_definition = @name @@ -424,26 +409,23 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i edge typeof -> @var_type.output } -@stmt [Statement [TupleDeconstructionStatement ... [TupleDeconstructionElements - ... - [TupleDeconstructionElement [TupleMember variant: [UntypedTupleMember ... @name name: [Identifier]]]] - ... -] ...]] { +@stmt [Statement [TupleDeconstructionStatement [TupleDeconstructionElements + [TupleDeconstructionElement [TupleMember variant: [UntypedTupleMember + @name name: [Identifier] + ]]] +]]] { node def attr (def) node_definition = @name edge @stmt.defs -> def } -@stmt [Statement [TupleDeconstructionStatement ... [TupleDeconstructionElements - ... +@stmt [Statement [TupleDeconstructionStatement [TupleDeconstructionElements [TupleDeconstructionElement [TupleMember variant: [TypedTupleMember - ... @member_type type_name: [TypeName] - ... - @name name: [Identifier]]]] - ... -] ...]] { + @name name: [Identifier] + ]]] +]]] { node def attr (def) node_definition = @name @@ -468,11 +450,8 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i } @state_var [StateVariableDefinition - ... @type_name type_name: [TypeName] - ... @name name: [Identifier] - ... ] { node def attr (def) node_definition = @name @@ -490,11 +469,9 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i ;; NB. Even though the grammar allows it, state variables can only be declared ;; inside contracts, and not interfaces or libraries. So, we will only bind ;; contract state variables. -@contract [ContractDefinition ... members: [ContractMembers - ... +@contract [ContractDefinition members: [ContractMembers item: [ContractMember @state_var variant: [StateVariableDefinition]] - ... -] ...] { +]] { edge @state_var.lexical_scope -> @contract.lexical_scope edge @contract.lexical_scope -> @state_var.def } @@ -504,7 +481,7 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i ;;; Enum definitions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -@enum [EnumDefinition ... @name name: [Identifier] ...] { +@enum [EnumDefinition @name name: [Identifier]] { node def attr (def) node_definition = @name @@ -518,9 +495,7 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i } @enum [EnumDefinition - ... - members: [EnumMembers ... @item [Identifier] ...] - ... + members: [EnumMembers @item [Identifier]] ] { node def attr (def) node_definition = @item @@ -532,27 +507,21 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i ;; NB. top-level enums (ie. those defined at the file's level) are already ;; covered above -@contract [ContractDefinition ... members: [ContractMembers - ... +@contract [ContractDefinition members: [ContractMembers item: [ContractMember @enum variant: [EnumDefinition]] - ... -] ...] { +]] { edge @contract.type_members -> @enum.def } -@interface [InterfaceDefinition ... members: [InterfaceMembers - ... +@interface [InterfaceDefinition members: [InterfaceMembers item: [ContractMember @enum variant: [EnumDefinition]] - ... -] ...] { +]] { edge @interface.type_members -> @enum.def } -@library [LibraryDefinition ... members: [LibraryMembers - ... +@library [LibraryDefinition members: [LibraryMembers item: [ContractMember @enum variant: [EnumDefinition]] - ... -] ...] { +]] { edge @library.members -> @enum.def } @@ -561,7 +530,7 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i ;;; Structure definitions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -@struct [StructDefinition ... @name name: [Identifier] ...] { +@struct [StructDefinition @name name: [Identifier]] { node def attr (def) node_definition = @name @@ -578,16 +547,14 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i edge member -> @struct.members } -@struct [StructDefinition ... [StructMembers ... @member item: [StructMember] ...] ...] { +@struct [StructDefinition [StructMembers @member item: [StructMember]]] { node @member.lexical_scope edge @member.lexical_scope -> @struct.lexical_scope } -@struct [StructDefinition ... [StructMembers - ... - @member item: [StructMember ... @type_name [TypeName] ... @name name: [Identifier] ...] - ... -] ...] { +@struct [StructDefinition [StructMembers + @member item: [StructMember @type_name [TypeName] @name name: [Identifier]] +]] { node def attr (def) node_definition = @name @@ -606,29 +573,23 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i ;; NB. top-level enums (ie. those defined at the file's level) are already ;; covered above -@contract [ContractDefinition ... members: [ContractMembers - ... +@contract [ContractDefinition members: [ContractMembers item: [ContractMember @struct variant: [StructDefinition]] - ... -] ...] { +]] { edge @struct.lexical_scope -> @contract.lexical_scope edge @contract.type_members -> @struct.def } -@interface [InterfaceDefinition ... members: [InterfaceMembers - ... +@interface [InterfaceDefinition members: [InterfaceMembers item: [ContractMember @struct variant: [StructDefinition]] - ... -] ...] { +]] { edge @struct.lexical_scope -> @interface.lexical_scope edge @interface.type_members -> @struct.def } -@library [LibraryDefinition ... members: [LibraryMembers - ... +@library [LibraryDefinition members: [LibraryMembers item: [ContractMember @struct variant: [StructDefinition]] - ... -] ...] { +]] { edge @struct.lexical_scope -> @library.lexical_scope edge @library.members -> @struct.def } @@ -645,38 +606,32 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i } ;; General case for nested expressions -@expr [Expression ... variant: [_ ... @child [Expression] ...] ...] { +@expr [Expression variant: [_ @child [Expression]]] { edge @child.lexical_scope -> @expr.lexical_scope } ;; Expressions as statements -@stmt [Statement ... variant: [_ ... @expr [Expression] ...] ...] { +@stmt [Statement variant: [_ @expr [Expression]]] { edge @expr.lexical_scope -> @stmt.lexical_scope } ;; Expressions used for variable declarations -@stmt [Statement ... variant: [VariableDeclarationStatement - ... - value: [VariableDeclarationValue ... @expr [Expression] ...] - ... -] ...] { +@stmt [Statement variant: [VariableDeclarationStatement + value: [VariableDeclarationValue @expr [Expression]] +]] { edge @expr.lexical_scope -> @stmt.lexical_scope } ;; Expressions used for state variable declarations @state_var [StateVariableDefinition - ... - value: [StateVariableDefinitionValue ... @expr [Expression]] - ... + value: [StateVariableDefinitionValue @expr [Expression]] ] { edge @expr.lexical_scope -> @state_var.lexical_scope } ;; Tuple expressions @tuple_expr [Expression [TupleExpression - ... - items: [TupleValues ... [TupleValue @expr [Expression]] ...] - ... + items: [TupleValues [TupleValue @expr [Expression]]] ]] { edge @expr.lexical_scope -> @tuple_expr.lexical_scope } @@ -684,7 +639,7 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i ;;; Identifier expressions -@expr [Expression ... @name variant: [Identifier]] { +@expr [Expression @name variant: [Identifier]] { node ref attr (ref) node_reference = @name @@ -696,13 +651,10 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i ;;; Member access expressions ;; TODO: implement variant for `.address` member -@expr [Expression ... [MemberAccessExpression - ... +@expr [Expression [MemberAccessExpression @operand operand: [Expression] - ... @name member: [Identifier] - ... -...]] { +]] { node ref attr (ref) node_reference = @name @@ -725,14 +677,12 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i } @args [ArgumentsDeclaration [PositionalArgumentsDeclaration - ... - [PositionalArguments ... @argument [Expression] ...] - ... + [PositionalArguments @argument [Expression]] ]] { edge @argument.lexical_scope -> @args.lexical_scope } -@named_arg [NamedArgument ... @name [Identifier] [Colon] @value [Expression]] { +@named_arg [NamedArgument @name [Identifier] [Colon] @value [Expression]] { node @named_arg.lexical_scope edge @value.lexical_scope -> @named_arg.lexical_scope @@ -744,24 +694,20 @@ attribute symbol_reference = symbol => type = "push_symbol", symbol = symbol, i } @args [ArgumentsDeclaration [NamedArgumentsDeclaration - ... - [NamedArgumentGroup ... [NamedArguments ... @argument [NamedArgument] ...] ...] - ... + [NamedArgumentGroup [NamedArguments @argument [NamedArgument]]] ]] { edge @argument.lexical_scope -> @args.lexical_scope } -@funcall [Expression [FunctionCallExpression ... @args [ArgumentsDeclaration]]] { +@funcall [Expression [FunctionCallExpression @args [ArgumentsDeclaration]]] { edge @args.lexical_scope -> @funcall.lexical_scope } ;;; Type expressions -@type_expr [Expression [TypeExpression ... @type [TypeName] ...]] { +@type_expr [Expression [TypeExpression @type [TypeName]]] { edge @type.type_ref -> @type_expr.lexical_scope } - - "#####; diff --git a/crates/solidity/outputs/cargo/tests/src/binding_rules.rs b/crates/solidity/outputs/cargo/tests/src/binding_rules.rs new file mode 100644 index 0000000000..e8bbc239ab --- /dev/null +++ b/crates/solidity/outputs/cargo/tests/src/binding_rules.rs @@ -0,0 +1,22 @@ +use std::path::PathBuf; + +use metaslang_graph_builder::ast::File; +use slang_solidity::bindings; +use slang_solidity::cst::KindTypes; + +#[test] +fn test_binding_rules_parse_successfully() { + let binding_rules = bindings::get_binding_rules(); + let graph_builder = File::::from_str(binding_rules); + + assert!( + graph_builder.is_ok(), + "Parsing binding rules failed:\n{}", + graph_builder + .err() + .map(|err| err + .display_pretty(&PathBuf::from("rules.msgb"), binding_rules) + .to_string()) + .unwrap_or_default() + ); +} diff --git a/crates/solidity/outputs/cargo/tests/src/bindings_assertions/assertions.rs b/crates/solidity/outputs/cargo/tests/src/bindings_assertions/assertions.rs index 564073e092..ed4f390516 100644 --- a/crates/solidity/outputs/cargo/tests/src/bindings_assertions/assertions.rs +++ b/crates/solidity/outputs/cargo/tests/src/bindings_assertions/assertions.rs @@ -7,7 +7,7 @@ use regex::Regex; use semver::{Version, VersionReq}; use slang_solidity::bindings::Bindings; use slang_solidity::cursor::Cursor; -use slang_solidity::query::Query; +use slang_solidity::kinds::TerminalKind; use thiserror::Error; #[derive(Debug, Error)] @@ -149,24 +149,30 @@ impl<'a> fmt::Display for DisplayCursor<'a> { /// // ^ref:2 /// // Result { +pub fn collect_assertions( + mut cursor: Cursor, + version: &Version, +) -> Result { let mut assertions = Assertions::new(); - let query = Query::parse("@comment [SingleLineComment]").unwrap(); - for result in cursor.query(vec![query]) { - let captures = result.captures; - let Some(comment) = captures.get("comment").and_then(|v| v.first()) else { - continue; - }; - - match find_assertion_in_comment(comment, version)? { - Some(Assertion::Definition(assertion)) => { - assertions.insert_definition_assertion(assertion)?; - } - Some(Assertion::Reference(assertion)) => { - assertions.insert_reference_assertion(assertion); + loop { + if cursor + .node() + .is_terminal_with_kind(TerminalKind::SingleLineComment) + { + match find_assertion_in_comment(&cursor, version)? { + Some(Assertion::Definition(assertion)) => { + assertions.insert_definition_assertion(assertion)?; + } + Some(Assertion::Reference(assertion)) => { + assertions.insert_reference_assertion(assertion); + } + None => (), } - None => (), + } + + if !cursor.go_to_next() { + break; } } diff --git a/crates/solidity/outputs/cargo/tests/src/bindings_rules.rs b/crates/solidity/outputs/cargo/tests/src/bindings_rules.rs deleted file mode 100644 index 67f1019801..0000000000 --- a/crates/solidity/outputs/cargo/tests/src/bindings_rules.rs +++ /dev/null @@ -1,10 +0,0 @@ -use metaslang_graph_builder::ast::File; -use slang_solidity::bindings; -use slang_solidity::cst::KindTypes; - -#[test] -fn test_bindings_rules_parsing() { - let graph_builder = File::::from_str(bindings::get_binding_rules()); - - assert!(graph_builder.is_ok()); -} diff --git a/crates/solidity/outputs/cargo/tests/src/doc_examples/tree_query_language.rs b/crates/solidity/outputs/cargo/tests/src/doc_examples/tree_query_language.rs index e36df96531..3a8142ec86 100644 --- a/crates/solidity/outputs/cargo/tests/src/doc_examples/tree_query_language.rs +++ b/crates/solidity/outputs/cargo/tests/src/doc_examples/tree_query_language.rs @@ -73,7 +73,7 @@ fn query_syntax() { let query = Query::parse( &" // --8<-- [start:query-syntax-4] - [MultiplicativeExpression left_operand:[_] [_] ...] + [MultiplicativeExpression left_operand:[_] [_]] // --8<-- [end:query-syntax-4] " .remove_mkdoc_snippet_markers(), @@ -84,7 +84,7 @@ fn query_syntax() { let query = Query::parse( &" // --8<-- [start:query-syntax-5] - [MultiplicativeExpression ... [Expression [StringExpression]] ...] + [MultiplicativeExpression [Expression [StringExpression]]] // --8<-- [end:query-syntax-5] " .remove_mkdoc_snippet_markers(), @@ -107,7 +107,7 @@ fn capturing_nodes() { let query = Query::parse( &" // --8<-- [start:capturing-nodes-1] - [StructDefinition ... @struct_name name:[Identifier] ...] + [StructDefinition @struct_name name:[Identifier]] // --8<-- [end:capturing-nodes-1] " .remove_mkdoc_snippet_markers(), @@ -120,21 +120,12 @@ fn capturing_nodes() { &" // --8<-- [start:capturing-nodes-2] [ContractDefinition - ... @contract_name name:[Identifier] - ... members:[ContractMembers - ... [ContractMember - [EventDefinition - ... - @event_name name:[Identifier] - ... - ] + [EventDefinition @event_name name:[Identifier]] ] - ... ] - ... ] // --8<-- [end:capturing-nodes-2] " @@ -154,7 +145,7 @@ fn quantification() { let query = Query::parse( &" // --8<-- [start:quantification-1] - [SourceUnit ... (leading_trivia:[_])+] + [SourceUnit members:[_ ([_ @import [ImportDirective]])+]] // --8<-- [end:quantification-1] " .remove_mkdoc_snippet_markers(), @@ -164,18 +155,15 @@ fn quantification() { assert_matches( &query, NonterminalKind::SourceUnit, - "// comment 1\n// comment 2\n/* comment 3 */", + "import 'test.sol';\nimport * as Utils from 'lib/utils.sol'\n\ncontract Test {}", ); let query = Query::parse( &" // --8<-- [start:quantification-2] - [ContractDefinition - ... - (@docline [SingleLineNatSpecComment])+ - ... + [StructDefinition @name name:[_] - ... + members:[_ ([_ @member [Identifier]])+] ] // --8<-- [end:quantification-2] " @@ -187,8 +175,10 @@ fn quantification() { &query, NonterminalKind::SourceUnit, " - /// A doc comment - contract A {} + struct Test { + int x; + int y; + } ", ); @@ -196,20 +186,13 @@ fn quantification() { &" // --8<-- [start:quantification-3] [FunctionCallExpression - ... arguments:[ArgumentsDeclaration variant:[PositionalArgumentsDeclaration - ... arguments:[PositionalArguments - ... (@arg [Expression variant:[StringExpression]])? - ... ] - ... ] - ... ] - ... ] // --8<-- [end:quantification-3] " @@ -227,7 +210,7 @@ fn quantification() { let matches: Vec<_> = iter.collect(); - matches[3].captures.get("arg").unwrap(); + matches[0].captures.get("arg").unwrap(); } #[test] @@ -240,7 +223,6 @@ fn alternations() { (@function variant:[Identifier] | @method variant:[MemberAccessExpression]) ] - ... ] // --8<-- [end:alternations-1] " @@ -292,3 +274,103 @@ fn alternations() { "break" ); } + +#[test] +fn adjacency() { + let query = Query::parse( + &r#" + // --8<-- [start:adjacency-1] + [FunctionDefinition + [ParametersDeclaration + [Parameters . @first_param [Parameter]] + ] + ] + // --8<-- [end:adjacency-1] + "# + .remove_mkdoc_snippet_markers(), + ) + .unwrap(); + + let iter = assert_matches( + &query, + NonterminalKind::FunctionDefinition, + "function test(int x, int y);", + ); + + let matches: Vec<_> = iter.collect(); + assert_eq!(matches.len(), 1); + assert_eq!( + matches[0].captures.get("first_param").unwrap()[0] + .node() + .unparse(), + "int x" + ); + + let query = Query::parse( + &r#" + // --8<-- [start:adjacency-2] + [FunctionDefinition + [ParametersDeclaration + [Parameters @last_param [Parameter] .] + ] + ] + // --8<-- [end:adjacency-2] + "# + .remove_mkdoc_snippet_markers(), + ) + .unwrap(); + + let iter = assert_matches( + &query, + NonterminalKind::FunctionDefinition, + "function test(int x, int y);", + ); + + let matches: Vec<_> = iter.collect(); + assert_eq!(matches.len(), 1); + assert_eq!( + matches[0].captures.get("last_param").unwrap()[0] + .node() + .unparse(), + " int y" + ); + + let query = Query::parse( + &r#" + // --8<-- [start:adjacency-3] + [Statements @stmt1 [Statement] . @stmt2 [Statement]] + // --8<-- [end:adjacency-3] + "# + .remove_mkdoc_snippet_markers(), + ) + .unwrap(); + + let iter = assert_matches(&query, NonterminalKind::Statements, "int x; int y; x + y;"); + + let matches: Vec<_> = iter.collect(); + assert_eq!(matches.len(), 2); + assert_eq!( + matches[0].captures.get("stmt1").unwrap()[0] + .node() + .unparse(), + "int x;" + ); + assert_eq!( + matches[0].captures.get("stmt2").unwrap()[0] + .node() + .unparse(), + " int y;" + ); + assert_eq!( + matches[1].captures.get("stmt1").unwrap()[0] + .node() + .unparse(), + " int y;" + ); + assert_eq!( + matches[1].captures.get("stmt2").unwrap()[0] + .node() + .unparse(), + " x + y;" + ); +} diff --git a/crates/solidity/outputs/cargo/tests/src/doc_examples/using_queries.rs b/crates/solidity/outputs/cargo/tests/src/doc_examples/using_queries.rs index 2f1439dea5..6447cb8fc3 100644 --- a/crates/solidity/outputs/cargo/tests/src/doc_examples/using_queries.rs +++ b/crates/solidity/outputs/cargo/tests/src/doc_examples/using_queries.rs @@ -64,8 +64,8 @@ fn using_queries() -> Result<()> { // --8<-- [start:multiple-patterns] let mut names = vec![]; - let struct_def = Query::parse("[StructDefinition ... @name [Identifier] ...]").unwrap(); - let enum_def = Query::parse("[EnumDefinition ... @name [Identifier] ...]").unwrap(); + let struct_def = Query::parse("[StructDefinition @name [Identifier]]").unwrap(); + let enum_def = Query::parse("[EnumDefinition @name [Identifier]]").unwrap(); for r#match in cursor.query(vec![struct_def, enum_def]) { let index = r#match.query_number; @@ -96,7 +96,7 @@ fn using_queries() -> Result<()> { let mut names = vec![]; - let query = Query::parse("[TypedTupleMember ... @type type_name:[_] ...]").unwrap(); + let query = Query::parse("[TypedTupleMember @type type_name:[_]]").unwrap(); for r#match in cursor.query(vec![query]) { let captures = r#match.captures; @@ -144,15 +144,8 @@ fn tx_origin_query() -> Result<()> { // --8<-- [start:tx-origin] let query = Query::parse( r#"@txorigin [MemberAccessExpression - ... - [Expression - ... - @start ["tx"] - ... - ] - ... + [Expression @start ["tx"]] ["origin"] - ... ]"#, ) .unwrap(); diff --git a/crates/solidity/outputs/cargo/tests/src/lib.rs b/crates/solidity/outputs/cargo/tests/src/lib.rs index 17fdb1a6c0..d0dbbc5f2e 100644 --- a/crates/solidity/outputs/cargo/tests/src/lib.rs +++ b/crates/solidity/outputs/cargo/tests/src/lib.rs @@ -2,9 +2,9 @@ use metaslang_bindings as _; +mod binding_rules; mod bindings_assertions; mod bindings_output; -mod bindings_rules; mod cst_output; mod doc_examples; mod generated; diff --git a/crates/solidity/outputs/npm/tests/src/doc-examples/using-queries.ts b/crates/solidity/outputs/npm/tests/src/doc-examples/using-queries.ts index b3e532af6c..da826ca4c5 100644 --- a/crates/solidity/outputs/npm/tests/src/doc-examples/using-queries.ts +++ b/crates/solidity/outputs/npm/tests/src/doc-examples/using-queries.ts @@ -58,8 +58,8 @@ test("using queries", async () => { // --8<-- [start:multiple-patterns] const names = []; - const struct_def = Query.parse("[StructDefinition ... @name [Identifier] ...]"); - const enum_def = Query.parse("[EnumDefinition ... @name [Identifier] ...]"); + const struct_def = Query.parse("[StructDefinition @name [Identifier]]"); + const enum_def = Query.parse("[EnumDefinition @name [Identifier]]"); const matches = cursor.query([struct_def, enum_def]); let match = null; @@ -89,7 +89,7 @@ test("using queries", async () => { const names = []; - const query = Query.parse("[TypedTupleMember ... @type type_name:[_] ...]"); + const query = Query.parse("[TypedTupleMember @type type_name:[_]]"); const matches = cursor.query([query]); let match = null; @@ -137,15 +137,8 @@ test("using queries", async () => { // --8<-- [start:tx-origin] const query = Query.parse(` @txorigin [MemberAccessExpression - ... - [Expression - ... - @start ["tx"] - ... - ] - ... + [Expression @start ["tx"]] ["origin"] - ... ]`); const matches = cursor.query([query]); diff --git a/crates/testlang/outputs/cargo/tests/src/graph/mod.rs b/crates/testlang/outputs/cargo/tests/src/graph/mod.rs index a8eaa45605..8b5f7b171a 100644 --- a/crates/testlang/outputs/cargo/tests/src/graph/mod.rs +++ b/crates/testlang/outputs/cargo/tests/src/graph/mod.rs @@ -20,11 +20,11 @@ fn builds_a_graph() { node @tree_node.def } - @tree [Tree ... @root node: [TreeNode] ...] { + @tree [Tree @root node: [TreeNode]] { edge @root.def -> @tree.def } - @parent [TreeNode ... members: [_ ... [_ @child variant: [TreeNode]] ...] ...] { + @parent [TreeNode members: [_ [_ @child variant: [TreeNode]]]] { edge @child.def -> @parent.def } "; diff --git a/crates/testlang/outputs/cargo/tests/src/query/engine_tests.rs b/crates/testlang/outputs/cargo/tests/src/query/engine_tests.rs index 0c6c464075..53568507c6 100644 --- a/crates/testlang/outputs/cargo/tests/src/query/engine_tests.rs +++ b/crates/testlang/outputs/cargo/tests/src/query/engine_tests.rs @@ -115,11 +115,32 @@ fn common_test_tree() -> Edge { ) } +fn common_test_tree_with_trivia() -> Edge { + cst_tree!( + TreeNode [ + Node: DelimitedIdentifier "A", + Whitespace " ", + DelimitedIdentifier "B", + Whitespace " ", + EndOfLine "\n", + DelimitedIdentifier "C", + TreeNodeChild [ + Whitespace " ", + DelimitedIdentifier "D", + EndOfLine "\n", + Whitespace " ", + Node: DelimitedIdentifier "E", + Whitespace " ", + ], + ] + ) +} + #[test] fn test_spread() { run_query_test( &common_test_tree(), - "[TreeNode ... @x1 [DelimitedIdentifier] ... @x2 [DelimitedIdentifier] ...]", + "[TreeNode @x1 [DelimitedIdentifier] @x2 [DelimitedIdentifier]]", query_matches! { {x1: ["A"], x2: ["B"]} {x1: ["A"], x2: ["C"]} @@ -132,7 +153,7 @@ fn test_spread() { fn test_adjacent() { run_query_test( &common_test_tree(), - "[TreeNode ... @y1 [DelimitedIdentifier] @y2 [DelimitedIdentifier] ...]", + "[TreeNode @y1 [DelimitedIdentifier] . @y2 [DelimitedIdentifier]]", query_matches! { {y1: ["A"], y2: ["B"]} {y1: ["B"], y2: ["C"]} @@ -140,11 +161,35 @@ fn test_adjacent() { ); } +#[test] +fn test_adjacency_skips_trivia() { + run_query_test( + &common_test_tree_with_trivia(), + "[TreeNode @y1 [DelimitedIdentifier] . @y2 [DelimitedIdentifier]]", + query_matches! { + {y1: ["A"], y2: ["B"]} + {y1: ["B"], y2: ["C"]} + }, + ); +} + +#[test] +fn test_anonymous_node_matcher_skips_trivia() { + run_query_test( + &common_test_tree_with_trivia(), + "[TreeNodeChild @x [_]]", + query_matches! { + {x: ["D"]} + {x: ["E"]} + }, + ); +} + #[test] fn test_child() { run_query_test( &common_test_tree(), - "[TreeNodeChild ... @x [DelimitedIdentifier] ...]", + "[TreeNodeChild @x [DelimitedIdentifier]]", query_matches! { {x: ["D"]} {x: ["E"]} @@ -156,7 +201,7 @@ fn test_child() { fn test_parent_and_child() { run_query_test( &common_test_tree(), - "[TreeNode ... @p node:[_] ... [TreeNodeChild ... @c [DelimitedIdentifier] ...]]", + "[TreeNode @p node:[_] [TreeNodeChild @c [DelimitedIdentifier]]]", query_matches! { {c: ["D"], p: ["A"]} {c: ["E"], p: ["A"]} @@ -168,7 +213,7 @@ fn test_parent_and_child() { fn test_named() { run_query_test( &common_test_tree(), - "[TreeNode ... @x node:[DelimitedIdentifier] ...]", + "[TreeNode @x node:[DelimitedIdentifier]]", query_matches! { {x: ["A"]} }, @@ -179,7 +224,7 @@ fn test_named() { fn test_multilevel_adjacent() { run_query_test( &common_test_tree(), - "[_ ... @x [DelimitedIdentifier] @y [DelimitedIdentifier] ...]", + "[_ @x [DelimitedIdentifier] . @y [DelimitedIdentifier]]", query_matches! { {x: ["A"], y: ["B"]} {x: ["B"], y: ["C"]} @@ -192,7 +237,7 @@ fn test_multilevel_adjacent() { fn test_multilevel_named() { run_query_test( &common_test_tree(), - "[_ ... @x node:[_] ...]", + "[_ @x node:[_]]", query_matches! { {x: ["A"]} {x: ["E"]} @@ -204,7 +249,7 @@ fn test_multilevel_named() { fn test_text_value() { run_query_test( &common_test_tree(), - r#"[TreeNode ... @z1 [DelimitedIdentifier] ["B"] @z2 [DelimitedIdentifier] ...]"#, + r#"[TreeNode @z1 [DelimitedIdentifier] . ["B"] . @z2 [DelimitedIdentifier]]"#, query_matches! { {z1: ["A"], z2: ["C"]} }, @@ -215,7 +260,7 @@ fn test_text_value() { fn test_one_or_more() { run_query_test( &common_test_tree(), - "[TreeNode ... (@x [DelimitedIdentifier])+ [_] ]", + "[TreeNode (@x [DelimitedIdentifier])+ . [_] .]", query_matches! { {x: ["A", "B", "C"]} {x: ["B", "C"]} @@ -228,7 +273,7 @@ fn test_one_or_more() { fn test_zero_or_more() { run_query_test( &common_test_tree(), - "[TreeNode ... (@y [DelimitedIdentifier])* [_] ]", + "[TreeNode (@y [DelimitedIdentifier])* . [_] .]", query_matches! { {y: ["A", "B", "C"]} {y: ["B", "C"]} @@ -242,7 +287,7 @@ fn test_zero_or_more() { fn test_optional() { run_query_test( &common_test_tree(), - "[TreeNode ... (@z [DelimitedIdentifier])? [_] ]", + "[TreeNode (@z [DelimitedIdentifier])? . [_] .]", query_matches! { {z: ["C"]} {} @@ -254,9 +299,207 @@ fn test_optional() { fn test_nested() { run_query_test( &common_test_tree(), - "@root [TreeNode ... @z [DelimitedIdentifier] [_] ]", + "@root [TreeNode @z [DelimitedIdentifier] . [_] .]", query_matches! { {root: ["ABCDE"], z: ["C"]} }, ); } + +#[test] +fn test_alternatives() { + run_query_test( + &common_test_tree(), + "(@x node:[_] | @y [DelimitedIdentifier] . @z [DelimitedIdentifier])", + query_matches! { + {x: ["A"]} + {y: ["A"], z: ["B"]} + {y: ["B"], z: ["C"]} + {y: ["D"], z: ["E"]} + {x: ["E"]} + }, + ); +} + +#[test] +fn test_adjacency_at_beginning_skips_trivia() { + run_query_test( + &common_test_tree_with_trivia(), + "[TreeNodeChild . @x [DelimitedIdentifier]]", + query_matches! { + {x: ["D"]} + }, + ); +} + +#[test] +fn test_adjacency_at_end_skips_trivia() { + run_query_test( + &common_test_tree_with_trivia(), + "[TreeNodeChild @x [DelimitedIdentifier] .]", + query_matches! { + {x: ["E"]} + }, + ); +} + +fn flat_tree() -> Edge { + cst_tree!( + TreeNode [ + Node: DelimitedIdentifier "A", + Whitespace " ", + DelimitedIdentifier "B", + DelimitedIdentifier "C", + DelimitedIdentifier "D", + ] + ) +} + +#[test] +fn test_ellipsis_followed_by_optional_grouping() { + run_query_test( + &flat_tree(), + "[TreeNode @x [DelimitedIdentifier] (@y [DelimitedIdentifier] . @z [DelimitedIdentifier])?]", + query_matches! { + {x: ["A"], y: ["B"], z: ["C"]} + {x: ["A"], y: ["C"], z: ["D"]} + {x: ["A"]} + {x: ["B"], y: ["C"], z: ["D"]} + {x: ["B"]} + {x: ["C"]} + {x: ["D"]} + }, + ); +} + +#[test] +fn test_adjacency_followed_by_optional_grouping() { + run_query_test( + &flat_tree(), + "[TreeNode @x [DelimitedIdentifier] . (@y [DelimitedIdentifier] . @z [DelimitedIdentifier])?]", + query_matches! { + {x: ["A"]} + {x: ["A"], y: ["B"], z: ["C"]} + {x: ["B"]} + {x: ["B"], y: ["C"], z: ["D"]} + {x: ["C"]} + {x: ["D"]} + }, + ); +} + +#[test] +fn test_captures_followed_by_non_captured_matchers() { + run_query_test( + &flat_tree(), + "[TreeNode @x [DelimitedIdentifier] [DelimitedIdentifier]]", + query_matches! { + {x: ["A"]} + {x: ["A"]} + {x: ["A"]} + {x: ["B"]} + {x: ["B"]} + {x: ["C"]} + }, + ); +} + +#[test] +fn test_captures_followed_by_anonymous_matchers() { + run_query_test( + &flat_tree(), + "[TreeNode @x [DelimitedIdentifier] [_]]", + query_matches! { + {x: ["A"]} + {x: ["A"]} + {x: ["A"]} + {x: ["B"]} + {x: ["B"]} + {x: ["C"]} + }, + ); +} + +#[test] +fn test_captures_followed_by_non_captured_optional_matchers() { + run_query_test( + &flat_tree(), + "[TreeNode @x [DelimitedIdentifier] [DelimitedIdentifier]?]", + query_matches! { + {x: ["A"]} + {x: ["A"]} + {x: ["A"]} + {x: ["A"]} + {x: ["B"]} + {x: ["B"]} + {x: ["B"]} + {x: ["C"]} + {x: ["C"]} + {x: ["D"]} + }, + ); +} + +#[test] +fn test_captures_followed_by_captured_optional_matchers() { + run_query_test( + &flat_tree(), + "[TreeNode @x [DelimitedIdentifier] @y [DelimitedIdentifier]?]", + query_matches! { + {x: ["A"], y: ["B"]} + {x: ["A"], y: ["C"]} + {x: ["A"], y: ["D"]} + {x: ["A"]} + {x: ["B"], y: ["C"]} + {x: ["B"], y: ["D"]} + {x: ["B"]} + {x: ["C"], y: ["D"]} + {x: ["C"]} + {x: ["D"]} + }, + ); +} + +fn sample_deep_tree() -> Edge { + cst_tree!( + Tree [ + Keyword: TreeKeyword "tree", + Name: Identifier "$t1", + Node: TreeNode [ + OpenBracket "[", + Members: TreeNodeChildren [ + TreeNodeChild [ + Variant: DelimitedIdentifier "A", + ], + TreeNodeChild [ + Variant: TreeNode [ + OpenBracket "[", + Members: TreeNodeChildren [ + TreeNodeChild [ + Variant: DelimitedIdentifier "B" + ], + TreeNodeChild [ + Variant: DelimitedIdentifier "C" + ], + ], + CloseBracket "]", + ] + ] + ], + CloseBracket "]", + ], + Semicolon: Semicolon ";" + ] + ) +} + +#[test] +fn test_deeply_nested_matchers() { + run_query_test( + &sample_deep_tree(), + "@parent [TreeNode members: [TreeNodeChildren [TreeNodeChild @child variant: [TreeNode]]]]", + query_matches! { + {parent: ["[A[BC]]"], child: ["[BC]"]} + }, + ); +} diff --git a/crates/testlang/outputs/cargo/tests/src/query/parser_tests.rs b/crates/testlang/outputs/cargo/tests/src/query/parser_tests.rs index 796485930a..3ed6dc0cc0 100644 --- a/crates/testlang/outputs/cargo/tests/src/query/parser_tests.rs +++ b/crates/testlang/outputs/cargo/tests/src/query/parser_tests.rs @@ -16,8 +16,8 @@ fn test_text_escaping() { } #[test] -fn test_ellipsis() { - run_parser_test(r#"[_ ...]"#, r#"[_ ...]"#); +fn test_adjacency() { + run_parser_test(r#"[_ . [_]]"#, r#"[_ . [_]]"#); } #[test] @@ -52,15 +52,12 @@ fn test_zero_or_more_canonicalisation() { // Test the error message on parse failure #[test] fn test_parsing_error() { - let result = Query::parse(r#"@root [_ ..."#); + let result = Query::parse(r#"@root [_"#); match result { Ok(_) => panic!("Expected error"), Err(e) => { - assert_eq!( - e.message, - "Parse error:\nexpected ']' at: \nAlt at: [_ ...\nAlt at: @root [_ ...\n" - ); - assert_eq!((e.row, e.column), (0, 12)); + assert_eq!(e.message, "Parse error:\nexpected ']' at: \nAlt at: [_\n"); + assert_eq!((e.row, e.column), (0, 8)); } } } @@ -68,52 +65,96 @@ fn test_parsing_error() { // See https://github.com/NomicFoundation/slang/issues/1042 #[test] fn test_parsing_error_with_invalid_edge_label() { - let result = Query::parse( - r#" -[Tree - ... - @name Name: [_] - ... -]"#, - ); + let result = Query::parse(r#"[Tree @name Name: [_]]"#); match result { Ok(_) => panic!("Expected error"), Err(e) => { assert_eq!( e.message, - "Parse error:\n'Name' is not a valid edge label at: Name: [_]\n ...\n]\n", + "Parse error:\n'Name' is not a valid edge label at: Name: [_]]\n", ); - assert_eq!((e.row, e.column), (3, 10)); + assert_eq!((e.row, e.column), (0, 12)); } } } #[test] fn test_parsing_error_with_invalid_node_kind() { - let result = Query::parse(r#"[Tree ... [tree_node] ...]"#); + let result = Query::parse(r#"[Tree [tree_node]]"#); match result { Ok(_) => panic!("Expected error"), Err(e) => { assert_eq!( e.message, - "Parse error:\n'tree_node' is not a valid node kind at: tree_node] ...]\n", + "Parse error:\n'tree_node' is not a valid node kind at: tree_node]]\n", ); - assert_eq!((e.row, e.column), (0, 11)); + assert_eq!((e.row, e.column), (0, 7)); } } } #[test] fn test_parsing_error_with_kind_beginning_with_underscore() { - let result = Query::parse(r#"[Tree ... [_tree_node] ...]"#); + let result = Query::parse(r#"[Tree [_tree_node]]"#); match result { Ok(_) => panic!("Expected error"), Err(e) => { assert_eq!( e.message, - "Parse error:\n'_tree_node' is not a valid node kind at: _tree_node] ...]\n", + "Parse error:\n'_tree_node' is not a valid node kind at: _tree_node]]\n", ); - assert_eq!((e.row, e.column), (0, 11)); + assert_eq!((e.row, e.column), (0, 7)); } } } + +#[test] +fn test_fails_parsing_ellipsis() { + let result = Query::parse(r#"[_ ...]"#); + match result { + Ok(_) => panic!("Expected parse failure"), + Err(e) => assert_eq!( + e.message, + "Parse error:\nThe ellipsis `...` operator is deprecated, and replaced with a new adjacency `.` operator. For more information, check the Tree Query Language guide: https://nomicfoundation.github.io/slang/user-guide/tree-query-language/ at: ...]\n", + ), + } +} + +#[test] +fn test_fails_consecutive_adjacency_operators() { + let result = Query::parse(r#"[_ [DelimitedIdentifier] . .]"#); + match result { + Ok(_) => panic!("Expected parse failure"), + Err(e) => assert_eq!(e.message, "Parse error:\nNoneOf at: .]\n"), + } +} + +#[test] +fn test_fails_sole_adjacency() { + let result = Query::parse(r#"[_ .]"#); + match result { + Ok(_) => panic!("Expected parse failure"), + Err(e) => assert_eq!( + e.message, + "Parse error:\nexpected ']' at: .]\nAlt at: [_ .]\n" + ), + } +} + +#[test] +fn test_fails_adjacency_at_edge_of_alt_option() { + let result = Query::parse(r#"([TreeNode] | . [DelimitedIdentifier])+"#); + assert!(result.is_err(), "Expected parse failure"); +} + +#[test] +fn test_fails_parsing_trivia_node_selector() { + let result = Query::parse(r#"[EndOfLine]"#); + match result { + Ok(_) => panic!("Expected parse failure"), + Err(e) => assert_eq!( + e.message, + "Parse error:\nMatching trivia nodes directly is forbidden. For more information, check the Tree Query Language guide: https://nomicfoundation.github.io/slang/user-guide/tree-query-language/ at: EndOfLine]\n" + ), + } +} diff --git a/crates/testlang/outputs/npm/tests/src/tests/query.ts b/crates/testlang/outputs/npm/tests/src/tests/query.ts index 2405e6bacb..4f328e3215 100644 --- a/crates/testlang/outputs/npm/tests/src/tests/query.ts +++ b/crates/testlang/outputs/npm/tests/src/tests/query.ts @@ -8,7 +8,7 @@ test("simple query", () => { const tree_source = `tree [A [B C] D];`; const parse_output = language.parse(NonterminalKind.Tree, tree_source); - const query_source = `[TreeNodeChild ... @id [DelimitedIdentifier]]`; + const query_source = `[TreeNodeChild @id [DelimitedIdentifier]]`; const query = Query.parse(query_source); const matches = parse_output.createTreeCursor().query([query]); diff --git a/documentation/public/user-guide/tree-query-language.md b/documentation/public/user-guide/tree-query-language.md index bfca807e70..3b682e4e17 100644 --- a/documentation/public/user-guide/tree-query-language.md +++ b/documentation/public/user-guide/tree-query-language.md @@ -6,8 +6,8 @@ A _query_ is a pattern that matches a certain set of nodes in a tree. The expression to match a given node consists of a pair of brackets (`[]`) containing two things: the node's kind, and optionally, a series of other patterns that match the node's children. For -example, this pattern would match any `MultiplicativeExpression` node whose children -are exactly two `Expression` nodes, with an `Asterisk` node in between (no whitespace): +example, this pattern would match any `MultiplicativeExpression` node that has +two children `Expression` nodes, with an `Asterisk` node in between: ```{ .scheme } --8<-- "crates/solidity/outputs/cargo/tests/src/doc_examples/tree_query_language.rs:query-syntax-1" @@ -36,7 +36,7 @@ node with two children, one of any kind labeled `left_operand` and one of any ki --8<-- "crates/solidity/outputs/cargo/tests/src/doc_examples/tree_query_language.rs:query-syntax-4" ``` -Children can also be elided. For example, this would produce multiple matches for a +Children can be elided. For example, this would produce multiple matches for a `MultiplicativeExpression` where at least _one_ of the children is an expression of a `StringExpression` variant, where each match is associated with each of the `StringExpression` children: @@ -44,6 +44,10 @@ is associated with each of the `StringExpression` children: --8<-- "crates/solidity/outputs/cargo/tests/src/doc_examples/tree_query_language.rs:query-syntax-5" ``` +Trivia nodes (whitespace, comments, etc.) will be skipped over when running a +query. Furthermore, trivia nodes cannot be explicitly (or implicitly with `_`) +matched by queries. + ### Capturing Nodes When matching patterns, you may want to process specific nodes within the @@ -72,13 +76,13 @@ by a `?`, `*` or `+` operator. The `?` operator matches _zero or one_ repetition of a pattern, the `*` operator matches _zero or more_, and the `+` operator matches _one or more_. -For example, this pattern would match a sequence of one or more comments at the top of the file: +For example, this pattern would match a sequence of one or more import directives at the top of the file: ```{ .scheme } --8<-- "crates/solidity/outputs/cargo/tests/src/doc_examples/tree_query_language.rs:quantification-1" ``` -This pattern would match a contract definition with at least one doc comment, capturing them: +This pattern would match a structure definition with one or more members, capturing their names: ```{ .scheme } --8<-- "crates/solidity/outputs/cargo/tests/src/doc_examples/tree_query_language.rs:quantification-2" @@ -93,7 +97,7 @@ present: ### Alternations -An alternation is written as a sequence of patterns separated by '|' and surrounded by parentheses. +An alternation is written as a sequence of patterns separated by `|` and surrounded by parentheses. For example, this pattern would match a call to either a variable or an object property. In the case of a variable, capture it as `@function`, and in the case of a property, capture it as `@method`: @@ -107,3 +111,29 @@ This pattern would match a set of possible keyword terminals, capturing them as ```{ .scheme } --8<-- "crates/solidity/outputs/cargo/tests/src/doc_examples/tree_query_language.rs:alternations-2" ``` + +### Adjacency + +By using the adjacency operator `.` you can constrain a pattern to only match +the first or the last child nodes. + +For example, the following pattern would match only the first parameter +declaration in a function definition: + +```{ .scheme } +--8<-- "crates/solidity/outputs/cargo/tests/src/doc_examples/tree_query_language.rs:adjacency-1" +``` + +And conversely the following will match only the last parameter: + +```{ .scheme } +--8<-- "crates/solidity/outputs/cargo/tests/src/doc_examples/tree_query_language.rs:adjacency-2" +``` + +If the adjacency operator is used in between two patterns it constrains matches +on both patterns to occur consecutively, ie. without any other sibling node in +between. For example, this pattern matches pairs of consecutive statements: + +```{ .scheme } +--8<-- "crates/solidity/outputs/cargo/tests/src/doc_examples/tree_query_language.rs:adjacency-3" +```