Skip to content

Commit

Permalink
fix trailing IdentifierPart in grammar (#641)
Browse files Browse the repository at this point in the history
- Moved numeric literals to use `notFollowedBy: IdentifierStart` instead
of `IdentifierPart`.
- Removed `notFollowedBy: IdentifierPart` from string literals, to match
the behavior of `solc`.
  • Loading branch information
OmarTawfik authored Nov 6, 2023
1 parent 36be89c commit 52f5efb
Show file tree
Hide file tree
Showing 8 changed files with 59 additions and 113 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
- oneOrMore:
reference: "HexCharacter"
notFollowedBy:
reference: "IdentifierPart"
reference: "IdentifierStart"
0.5.0:
# removed uppercase "0X"
trailingContext:
Expand All @@ -52,7 +52,7 @@
- oneOrMore:
reference: "HexCharacter"
notFollowedBy:
reference: "IdentifierPart"
reference: "IdentifierStart"

- name: "DecimalLiteral"
kind: "Scanner"
Expand All @@ -75,7 +75,7 @@
- optional:
reference: "DecimalExponent"
notFollowedBy:
reference: "IdentifierPart"
reference: "IdentifierStart"
0.5.0:
# Second "DecimalDigits" is no longer "optional"
trailingContext:
Expand All @@ -94,7 +94,7 @@
- optional:
reference: "DecimalExponent"
notFollowedBy:
reference: "IdentifierPart"
reference: "IdentifierStart"

- name: "DecimalDigits"
kind: "Scanner"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,9 @@
- name: "HexStringLiteral"
kind: "Scanner"
unversioned:
trailingContext:
scanner:
choice:
- reference: "SingleQuotedHexStringLiteral"
- reference: "DoubleQuotedHexStringLiteral"
notFollowedBy:
reference: "IdentifierPart"
choice:
- reference: "SingleQuotedHexStringLiteral"
- reference: "DoubleQuotedHexStringLiteral"

- name: "SingleQuotedHexStringLiteral"
kind: "Scanner"
Expand Down Expand Up @@ -92,13 +88,9 @@
- name: "AsciiStringLiteral"
kind: "Scanner"
unversioned:
trailingContext:
scanner:
choice:
- reference: "SingleQuotedAsciiStringLiteral"
- reference: "DoubleQuotedAsciiStringLiteral"
notFollowedBy:
reference: "IdentifierPart"
choice:
- reference: "SingleQuotedAsciiStringLiteral"
- reference: "DoubleQuotedAsciiStringLiteral"

- name: "SingleQuotedAsciiStringLiteral"
kind: "Scanner"
Expand Down Expand Up @@ -151,13 +143,9 @@
kind: "Scanner"
versioned:
0.7.0:
trailingContext:
scanner:
choice:
- reference: "SingleQuotedUnicodeStringLiteral"
- reference: "DoubleQuotedUnicodeStringLiteral"
notFollowedBy:
reference: "IdentifierPart"
choice:
- reference: "SingleQuotedUnicodeStringLiteral"
- reference: "DoubleQuotedUnicodeStringLiteral"

- name: "SingleQuotedUnicodeStringLiteral"
kind: "Scanner"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@
- oneOrMore:
reference: "HexCharacter"
notFollowedBy:
reference: "IdentifierPart"
reference: "IdentifierStart"

- name: "YulDecimalLiteral"
kind: "Scanner"
Expand All @@ -153,4 +153,4 @@
from: "0"
to: "9"
notFollowedBy:
reference: "IdentifierPart"
reference: "IdentifierStart"
49 changes: 20 additions & 29 deletions crates/solidity/inputs/language/src/definition.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3641,7 +3641,7 @@ codegen_language_macros::compile!(Language(
OneOrMore(Fragment(HexCharacter))
]))
]),
not_followed_by = Fragment(IdentifierPart)
not_followed_by = Fragment(IdentifierStart)
)
),
// Uppercase "0X" only enabled before "0.5.0":
Expand All @@ -3656,7 +3656,7 @@ codegen_language_macros::compile!(Language(
OneOrMore(Fragment(HexCharacter))
]))
]),
not_followed_by = Fragment(IdentifierPart)
not_followed_by = Fragment(IdentifierStart)
)
)
]
Expand All @@ -3671,7 +3671,7 @@ codegen_language_macros::compile!(Language(
Fragment(DecimalDigits),
Optional(Fragment(DecimalExponent))
]),
not_followed_by = Fragment(IdentifierPart)
not_followed_by = Fragment(IdentifierStart)
)
),
// An integer and a dot (without a fraction) is disabled in "0.5.0"
Expand All @@ -3683,7 +3683,7 @@ codegen_language_macros::compile!(Language(
Atom("."),
Optional(Fragment(DecimalExponent))
]),
not_followed_by = Fragment(IdentifierPart)
not_followed_by = Fragment(IdentifierStart)
)
),
// A dot and a fraction (without an integer) is enabled in all versions:
Expand All @@ -3694,7 +3694,7 @@ codegen_language_macros::compile!(Language(
Fragment(DecimalDigits),
Optional(Fragment(DecimalExponent))
]),
not_followed_by = Fragment(IdentifierPart)
not_followed_by = Fragment(IdentifierStart)
)
),
// An integer, a dot, and a fraction is enabled in all versions:
Expand All @@ -3706,7 +3706,7 @@ codegen_language_macros::compile!(Language(
Fragment(DecimalDigits),
Optional(Fragment(DecimalExponent))
]),
not_followed_by = Fragment(IdentifierPart)
not_followed_by = Fragment(IdentifierStart)
)
)
]
Expand Down Expand Up @@ -3787,13 +3787,10 @@ codegen_language_macros::compile!(Language(
Token(
name = HexStringLiteral,
definitions = [TokenDefinition(
scanner = TrailingContext(
scanner = Choice([
Fragment(SingleQuotedHexString),
Fragment(DoubleQuotedHexString)
]),
not_followed_by = Fragment(IdentifierPart)
)
scanner = Choice([
Fragment(SingleQuotedHexString),
Fragment(DoubleQuotedHexString)
])
)]
),
Fragment(
Expand Down Expand Up @@ -3836,13 +3833,10 @@ codegen_language_macros::compile!(Language(
Token(
name = AsciiStringLiteral,
definitions = [TokenDefinition(
scanner = TrailingContext(
scanner = Choice([
Fragment(SingleQuotedAsciiString),
Fragment(DoubleQuotedAsciiString)
]),
not_followed_by = Fragment(IdentifierPart)
)
scanner = Choice([
Fragment(SingleQuotedAsciiString),
Fragment(DoubleQuotedAsciiString)
])
)]
),
Fragment(
Expand Down Expand Up @@ -3880,13 +3874,10 @@ codegen_language_macros::compile!(Language(
name = UnicodeStringLiteral,
definitions = [TokenDefinition(
enabled = From("0.7.0"),
scanner = TrailingContext(
scanner = Choice([
Fragment(SingleQuotedUnicodeString),
Fragment(DoubleQuotedUnicodeString)
]),
not_followed_by = Fragment(IdentifierPart)
)
scanner = Choice([
Fragment(SingleQuotedUnicodeString),
Fragment(DoubleQuotedUnicodeString)
])
)]
),
Fragment(
Expand Down Expand Up @@ -4268,7 +4259,7 @@ codegen_language_macros::compile!(Language(
))
])
]),
not_followed_by = Fragment(IdentifierPart)
not_followed_by = Fragment(IdentifierStart)
)
)]
),
Expand All @@ -4278,7 +4269,7 @@ codegen_language_macros::compile!(Language(
scanner = TrailingContext(
scanner =
Sequence([Atom("0x"), OneOrMore(Fragment(HexCharacter))]),
not_followed_by = Fragment(IdentifierPart)
not_followed_by = Fragment(IdentifierStart)
)
)]
)
Expand Down
15 changes: 3 additions & 12 deletions crates/solidity/inputs/language/src/dsl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -796,30 +796,21 @@ slang_grammar! {

// Ascii String Literals

scanner AsciiStringLiteral = (
(SingleQuotedAsciiStringLiteral | DoubleQuotedAsciiStringLiteral)
not followed by IdentifierStart
) ;
scanner AsciiStringLiteral = (SingleQuotedAsciiStringLiteral | DoubleQuotedAsciiStringLiteral) ;
scanner DoubleQuotedAsciiStringLiteral = ("\"" ((EscapeSequence | AsciiCharacterWithoutDoubleQuoteOrBackslash) *) "\"") ;
scanner SingleQuotedAsciiStringLiteral = ("\'" ((EscapeSequence | AsciiCharacterWithoutSingleQuoteOrBackslash) *) "\'") ;

// Hex String Literals

scanner HexStringLiteral = (
(SingleQuotedHexStringLiteral | DoubleQuotedHexStringLiteral)
not followed by IdentifierStart
) ;
scanner HexStringLiteral = (SingleQuotedHexStringLiteral | DoubleQuotedHexStringLiteral) ;
scanner DoubleQuotedHexStringLiteral = ("hex\"" (HexStringContents ?) "\"") ;
scanner SingleQuotedHexStringLiteral = ("hex\'" (HexStringContents ?) "\'") ;
scanner HexStringContents = (HexCharacter HexCharacter ((('_' ?) HexCharacter HexCharacter) *)) ;

// Unicode String Literals

scanner UnicodeStringLiteral = {
introduced in "0.7.0" (
(SingleQuotedUnicodeStringLiteral | DoubleQuotedUnicodeStringLiteral)
not followed by IdentifierStart
)
introduced in "0.7.0" (SingleQuotedUnicodeStringLiteral | DoubleQuotedUnicodeStringLiteral)
} ;
scanner DoubleQuotedUnicodeStringLiteral = { introduced in "0.7.0" ("unicode\"" ((EscapeSequence | (! "\n\r\"\\")) *) "\"") } ;
scanner SingleQuotedUnicodeStringLiteral = { introduced in "0.7.0" ("unicode\'" ((EscapeSequence | (! "\n\r\'\\")) *) "\'") } ;
Expand Down
30 changes: 9 additions & 21 deletions crates/solidity/outputs/cargo/crate/src/generated/language.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions crates/solidity/outputs/cargo/tests/src/scanner/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,15 @@ fn test_next_token() {
("1", DecimalLiteral),
("\n", EndOfLine),
("unicode'abc'", UnicodeStringLiteral),
("unicode'abc'ZZ", Identifier), // TODO: This needs to be further checked against solc
("hex'abcd'", HexStringLiteral),
("hex'abcd'ZZz", HexKeyword), // TODO: This needs to be further checked against solc
("'abc'ZZ", AsciiStringLiteral), // with an identifier afterwards
("unicode'abc'ZZ", UnicodeStringLiteral), // with an identifier afterwards
("hex'abcd'ZZz", HexStringLiteral), // with an identifier afterwards
("// single line\n", SingleLineComment),
("/* multi-line\n comment */ blah", MultilineComment),
("/* multi-line comment **/ blah", MultilineComment),
("0ZZ", SKIPPED),
("0xabZZ", SKIPPED),
("'abc'ZZ", SKIPPED),
] {
assert_eq!(language.scan(LexicalContext::Default, s), Some(*k));
}
Expand Down
30 changes: 9 additions & 21 deletions crates/solidity/outputs/npm/crate/src/generated/language.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 52f5efb

Please sign in to comment.