diff --git a/src/main/php/lang/ast/Tokens.class.php b/src/main/php/lang/ast/Tokens.class.php index bc4303a..c8b278f 100755 --- a/src/main/php/lang/ast/Tokens.class.php +++ b/src/main/php/lang/ast/Tokens.class.php @@ -12,7 +12,7 @@ class Tokens { const DELIMITERS = " \r\n\t'\$\"`=,;.:?!(){}[]#+-*/|&^@%~<>"; const OPERATORS = [ - '<' => ['<=>', '<<=', '<=', '<<', '<>', ' ['<=>', '<<=', '<<<', '<=', '<<', '<>', '' => ['>>=', '>=', '>>'], '=' => ['===', '=>', '=='], '!' => ['!==', '!='], @@ -22,7 +22,7 @@ class Tokens { '+' => ['+=', '++'], '-' => ['-=', '--', '->'], '*' => ['**=', '*=', '**'], - '/' => ['/='], + '/' => ['/=', '//', '/*'], '~' => ['~='], '%' => ['%='], '?' => ['?->', '??=', '?:', '??'], @@ -106,10 +106,10 @@ public function iterator($language) { $end= '\\'.$token; do { $chunk= $next($end); - if (null === $chunk) { - throw new FormatException('Unclosed string literal starting at line '.$line); - } else if ('\\' === $chunk) { + if ('\\' === $chunk) { $string.= $chunk.$next($end); + } else if (null === $chunk) { + throw new FormatException('Unclosed string literal starting at line '.$line); } else { $string.= $chunk; } @@ -162,34 +162,17 @@ public function iterator($language) { goto number; } $offset-= strlen($t); - } else if ('/' === $token) { - $t= $next(self::DELIMITERS); - if ('/' === $t) { - yield new Token(null, 'comment', '//'.$next("\r\n"), $line); - continue; - } else if ('*' === $t) { - $comment= ''; - do { - $chunk= $next('/'); - $comment.= $chunk; - } while (null !== $chunk && '*' !== $chunk[strlen($chunk) - 1]); - $comment.= $next('/'); - yield new Token(null, '*' === $comment[0] ? 'apidoc' : 'comment', '/*'.$comment, $line); - $line+= substr_count($comment, "\n"); - continue; - } - null === $t || $offset-= strlen($t); } // Handle combined operators. First, ensure we have enough bytes in our buffer // Our longest operator is 3 characters, hardcode this here. - if (self::OPERATORS[$token]) { + if ($combined= self::OPERATORS[$token]) { $offset--; while ($offset + 3 > $length && $this->in->available()) { $buffer.= $this->in->read(8192); $length= strlen($buffer); } - foreach (self::OPERATORS[$token] as $operator) { + foreach ($combined as $operator) { if ($offset + strlen($operator) > $length) continue; if (0 === substr_compare($buffer, $operator, $offset, strlen($operator))) { $token= $operator; @@ -197,8 +180,41 @@ public function iterator($language) { } } $offset+= strlen($token); - } + // Distinguish single- and multiline comments as well as heredoc from operators + if ('//' === $token) { + yield new Token(null, 'comment', '//'.$next("\r\n"), $line); + continue; + } else if ('/*' === $token) { + $comment= ''; + do { + $chunk= $next('/'); + $comment.= $chunk; + } while (null !== $chunk && '*' !== $chunk[strlen($chunk) - 1]); + $comment.= $next('/'); + yield new Token(null, '*' === $comment[0] ? 'apidoc' : 'comment', '/*'.$comment, $line); + $line+= substr_count($comment, "\n"); + continue; + } else if ('<<<' === $token) { + $label= $next("\r\n"); + $end= trim($label, '"\''); + $l= strlen($end); + $string= "<<<{$label}"; + + heredoc: $token= $next("\r\n"); + if (0 === substr_compare($token, $end, $p= strspn($token, ' '), $l)) { + $p+= $l; + $offset-= strlen($token) - $p; + yield new Token($language->symbol('(literal)'), 'heredoc', $string.substr($token, 0, $p), $line); + $line+= substr_count($string, "\n"); + continue; + } else if (null === $token) { + throw new FormatException('Unclosed heredoc literal starting at line '.$line); + } + $string.= $token; + goto heredoc; + } + } yield new Token($language->symbol($token), 'operator', $token, $line); } else { yield new Token($language->symbols[$token] ?? $language->symbol('(name)'), 'name', $token, $line); diff --git a/src/test/php/lang/ast/unittest/TokensTest.class.php b/src/test/php/lang/ast/unittest/TokensTest.class.php index 267b45b..ca403c7 100755 --- a/src/test/php/lang/ast/unittest/TokensTest.class.php +++ b/src/test/php/lang/ast/unittest/TokensTest.class.php @@ -43,6 +43,11 @@ public function unclosed_string_literals($input) { (new Tokens($input))->iterator($this->language)->current(); } + #[Test, Expect(class: FormatException::class, message: '/Unclosed heredoc literal/'), Values(['<<iterator($this->language)->current(); + } + #[Test, Values(['0', '1', '1_000_000_000'])] public function integer_literal($input) { $this->assertTokens([['integer' => str_replace('_', '', $input)]], new Tokens($input)); diff --git a/src/test/php/lang/ast/unittest/parse/LiteralsTest.class.php b/src/test/php/lang/ast/unittest/parse/LiteralsTest.class.php index 1eb9273..cfc231d 100755 --- a/src/test/php/lang/ast/unittest/parse/LiteralsTest.class.php +++ b/src/test/php/lang/ast/unittest/parse/LiteralsTest.class.php @@ -86,4 +86,56 @@ public function dangling_comma_in_key_value_map($declaration) { $pair= [new Literal('"key"', self::LINE), new Literal('"value"', self::LINE)]; $this->assertParsed([new ArrayLiteral([$pair], self::LINE)], $declaration); } + + #[Test, Values(['EOD', '"EOD"', "'EOD'"])] + public function heredoc($label) { + $nowdoc= ( + "<<<{$label}\n". + "Line 1\n". + "Line 2\n". + "\n". + "Line 4\n". + "EOD" + ); + $this->assertParsed([new Literal($nowdoc, self::LINE)], $nowdoc.';'); + } + + #[Test] + public function heredoc_indentation() { + $nowdoc= ( + "<<assertParsed([new Literal($nowdoc, self::LINE)], $nowdoc.';'); + } + + #[Test] + public function line_number_after_multiline_string() { + $string= ( + "'\n". + " ...\n". + "'" + ); + $this->assertParsed( + [new Literal($string, self::LINE), new Literal('null', self::LINE + 3)], + $string.";\nnull;" + ); + } + + #[Test] + public function line_number_after_heredoc() { + $nowdoc= ( + "<<assertParsed( + [new Literal($nowdoc, self::LINE), new Literal('null', self::LINE + 3)], + $nowdoc.";\nnull;" + ); + } } \ No newline at end of file